linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [net v3] net: wwan: t7xx: Fix FSM command timeout issue
@ 2024-12-24  4:15 Jinjian Song
  2024-12-24  7:55 ` Sergey Ryazanov
  2024-12-31  2:10 ` patchwork-bot+netdevbpf
  0 siblings, 2 replies; 3+ messages in thread
From: Jinjian Song @ 2024-12-24  4:15 UTC (permalink / raw)
  To: chandrashekar.devegowda, chiranjeevi.rapolu, haijun.liu,
	m.chetan.kumar, ricardo.martinez, loic.poulain, ryazanov.s.a,
	johannes, davem, edumazet, kuba, pabeni
  Cc: linux-kernel, netdev, linux-doc, angelogioacchino.delregno,
	linux-arm-kernel, matthias.bgg, corbet, linux-mediatek, helgaas,
	danielwinkler, korneld, andrew+netdev, horms, Jinjian Song

When driver processes the internal state change command, it use an
asynchronous thread to process the command operation. If the main
thread detects that the task has timed out, the asynchronous thread
will panic when executing the completion notification because the
main thread completion object has been released.

BUG: unable to handle page fault for address: fffffffffffffff8
PGD 1f283a067 P4D 1f283a067 PUD 1f283c067 PMD 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
RIP: 0010:complete_all+0x3e/0xa0
[...]
Call Trace:
 <TASK>
 ? __die_body+0x68/0xb0
 ? page_fault_oops+0x379/0x3e0
 ? exc_page_fault+0x69/0xa0
 ? asm_exc_page_fault+0x22/0x30
 ? complete_all+0x3e/0xa0
 fsm_main_thread+0xa3/0x9c0 [mtk_t7xx (HASH:1400 5)]
 ? __pfx_autoremove_wake_function+0x10/0x10
 kthread+0xd8/0x110
 ? __pfx_fsm_main_thread+0x10/0x10 [mtk_t7xx (HASH:1400 5)]
 ? __pfx_kthread+0x10/0x10
 ret_from_fork+0x38/0x50
 ? __pfx_kthread+0x10/0x10
 ret_from_fork_asm+0x1b/0x30
 </TASK>
[...]
CR2: fffffffffffffff8
---[ end trace 0000000000000000 ]---

Use the reference counter to ensure safe release as Sergey suggests:
https://lore.kernel.org/all/da90f64c-260a-4329-87bf-1f9ff20a5951@gmail.com/

Fixes: 13e920d93e37 ("net: wwan: t7xx: Add core components")
Signed-off-by: Jinjian Song <jinjian.song@fibocom.com>
---
 drivers/net/wwan/t7xx/t7xx_state_monitor.c | 26 ++++++++++++++--------
 drivers/net/wwan/t7xx/t7xx_state_monitor.h |  5 +++--
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.c b/drivers/net/wwan/t7xx/t7xx_state_monitor.c
index 3931c7a13f5a..cbdbb91e8381 100644
--- a/drivers/net/wwan/t7xx/t7xx_state_monitor.c
+++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.c
@@ -104,14 +104,21 @@ void t7xx_fsm_broadcast_state(struct t7xx_fsm_ctl *ctl, enum md_state state)
 	fsm_state_notify(ctl->md, state);
 }
 
+static void fsm_release_command(struct kref *ref)
+{
+	struct t7xx_fsm_command *cmd = container_of(ref, typeof(*cmd), refcnt);
+
+	kfree(cmd);
+}
+
 static void fsm_finish_command(struct t7xx_fsm_ctl *ctl, struct t7xx_fsm_command *cmd, int result)
 {
 	if (cmd->flag & FSM_CMD_FLAG_WAIT_FOR_COMPLETION) {
-		*cmd->ret = result;
-		complete_all(cmd->done);
+		cmd->result = result;
+		complete_all(&cmd->done);
 	}
 
-	kfree(cmd);
+	kref_put(&cmd->refcnt, fsm_release_command);
 }
 
 static void fsm_del_kf_event(struct t7xx_fsm_event *event)
@@ -475,7 +482,6 @@ static int fsm_main_thread(void *data)
 
 int t7xx_fsm_append_cmd(struct t7xx_fsm_ctl *ctl, enum t7xx_fsm_cmd_state cmd_id, unsigned int flag)
 {
-	DECLARE_COMPLETION_ONSTACK(done);
 	struct t7xx_fsm_command *cmd;
 	unsigned long flags;
 	int ret;
@@ -487,11 +493,13 @@ int t7xx_fsm_append_cmd(struct t7xx_fsm_ctl *ctl, enum t7xx_fsm_cmd_state cmd_id
 	INIT_LIST_HEAD(&cmd->entry);
 	cmd->cmd_id = cmd_id;
 	cmd->flag = flag;
+	kref_init(&cmd->refcnt);
 	if (flag & FSM_CMD_FLAG_WAIT_FOR_COMPLETION) {
-		cmd->done = &done;
-		cmd->ret = &ret;
+		init_completion(&cmd->done);
+		kref_get(&cmd->refcnt);
 	}
 
+	kref_get(&cmd->refcnt);
 	spin_lock_irqsave(&ctl->command_lock, flags);
 	list_add_tail(&cmd->entry, &ctl->command_queue);
 	spin_unlock_irqrestore(&ctl->command_lock, flags);
@@ -501,11 +509,11 @@ int t7xx_fsm_append_cmd(struct t7xx_fsm_ctl *ctl, enum t7xx_fsm_cmd_state cmd_id
 	if (flag & FSM_CMD_FLAG_WAIT_FOR_COMPLETION) {
 		unsigned long wait_ret;
 
-		wait_ret = wait_for_completion_timeout(&done,
+		wait_ret = wait_for_completion_timeout(&cmd->done,
 						       msecs_to_jiffies(FSM_CMD_TIMEOUT_MS));
-		if (!wait_ret)
-			return -ETIMEDOUT;
 
+		ret = wait_ret ? cmd->result : -ETIMEDOUT;
+		kref_put(&cmd->refcnt, fsm_release_command);
 		return ret;
 	}
 
diff --git a/drivers/net/wwan/t7xx/t7xx_state_monitor.h b/drivers/net/wwan/t7xx/t7xx_state_monitor.h
index 7b0a9baf488c..6e0601bb752e 100644
--- a/drivers/net/wwan/t7xx/t7xx_state_monitor.h
+++ b/drivers/net/wwan/t7xx/t7xx_state_monitor.h
@@ -110,8 +110,9 @@ struct t7xx_fsm_command {
 	struct list_head	entry;
 	enum t7xx_fsm_cmd_state	cmd_id;
 	unsigned int		flag;
-	struct completion	*done;
-	int			*ret;
+	struct completion	done;
+	int			result;
+	struct kref		refcnt;
 };
 
 struct t7xx_fsm_notifier {
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [net v3] net: wwan: t7xx: Fix FSM command timeout issue
  2024-12-24  4:15 [net v3] net: wwan: t7xx: Fix FSM command timeout issue Jinjian Song
@ 2024-12-24  7:55 ` Sergey Ryazanov
  2024-12-31  2:10 ` patchwork-bot+netdevbpf
  1 sibling, 0 replies; 3+ messages in thread
From: Sergey Ryazanov @ 2024-12-24  7:55 UTC (permalink / raw)
  To: Jinjian Song, chandrashekar.devegowda, chiranjeevi.rapolu,
	haijun.liu, m.chetan.kumar, ricardo.martinez, loic.poulain,
	johannes, davem, edumazet, kuba, pabeni
  Cc: linux-kernel, netdev, linux-doc, angelogioacchino.delregno,
	linux-arm-kernel, matthias.bgg, corbet, linux-mediatek, helgaas,
	danielwinkler, korneld, andrew+netdev, horms

On December 24, 2024 6:15:52 AM GMT+02:00, Jinjian Song <jinjian.song@fibocom.com> wrote:
>When driver processes the internal state change command, it use an
>asynchronous thread to process the command operation. If the main
>thread detects that the task has timed out, the asynchronous thread
>will panic when executing the completion notification because the
>main thread completion object has been released.
>
>BUG: unable to handle page fault for address: fffffffffffffff8
>PGD 1f283a067 P4D 1f283a067 PUD 1f283c067 PMD 0
>Oops: 0000 [#1] PREEMPT SMP NOPTI
>RIP: 0010:complete_all+0x3e/0xa0
>[...]
>Call Trace:
> <TASK>
> ? __die_body+0x68/0xb0
> ? page_fault_oops+0x379/0x3e0
> ? exc_page_fault+0x69/0xa0
> ? asm_exc_page_fault+0x22/0x30
> ? complete_all+0x3e/0xa0
> fsm_main_thread+0xa3/0x9c0 [mtk_t7xx (HASH:1400 5)]
> ? __pfx_autoremove_wake_function+0x10/0x10
> kthread+0xd8/0x110
> ? __pfx_fsm_main_thread+0x10/0x10 [mtk_t7xx (HASH:1400 5)]
> ? __pfx_kthread+0x10/0x10
> ret_from_fork+0x38/0x50
> ? __pfx_kthread+0x10/0x10
> ret_from_fork_asm+0x1b/0x30
> </TASK>
>[...]
>CR2: fffffffffffffff8
>---[ end trace 0000000000000000 ]---
>
>Use the reference counter to ensure safe release as Sergey suggests:
>https://lore.kernel.org/all/da90f64c-260a-4329-87bf-1f9ff20a5951@gmail.com/
>
>Fixes: 13e920d93e37 ("net: wwan: t7xx: Add core components")
>Signed-off-by: Jinjian Song <jinjian.song@fibocom.com>

Acked-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [net v3] net: wwan: t7xx: Fix FSM command timeout issue
  2024-12-24  4:15 [net v3] net: wwan: t7xx: Fix FSM command timeout issue Jinjian Song
  2024-12-24  7:55 ` Sergey Ryazanov
@ 2024-12-31  2:10 ` patchwork-bot+netdevbpf
  1 sibling, 0 replies; 3+ messages in thread
From: patchwork-bot+netdevbpf @ 2024-12-31  2:10 UTC (permalink / raw)
  To: Jinjian Song
  Cc: chandrashekar.devegowda, chiranjeevi.rapolu, haijun.liu,
	m.chetan.kumar, ricardo.martinez, loic.poulain, ryazanov.s.a,
	johannes, davem, edumazet, kuba, pabeni, linux-kernel, netdev,
	linux-doc, angelogioacchino.delregno, linux-arm-kernel,
	matthias.bgg, corbet, linux-mediatek, helgaas, danielwinkler,
	korneld, andrew+netdev, horms

Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Tue, 24 Dec 2024 12:15:52 +0800 you wrote:
> When driver processes the internal state change command, it use an
> asynchronous thread to process the command operation. If the main
> thread detects that the task has timed out, the asynchronous thread
> will panic when executing the completion notification because the
> main thread completion object has been released.
> 
> BUG: unable to handle page fault for address: fffffffffffffff8
> PGD 1f283a067 P4D 1f283a067 PUD 1f283c067 PMD 0
> Oops: 0000 [#1] PREEMPT SMP NOPTI
> RIP: 0010:complete_all+0x3e/0xa0
> [...]
> Call Trace:
>  <TASK>
>  ? __die_body+0x68/0xb0
>  ? page_fault_oops+0x379/0x3e0
>  ? exc_page_fault+0x69/0xa0
>  ? asm_exc_page_fault+0x22/0x30
>  ? complete_all+0x3e/0xa0
>  fsm_main_thread+0xa3/0x9c0 [mtk_t7xx (HASH:1400 5)]
>  ? __pfx_autoremove_wake_function+0x10/0x10
>  kthread+0xd8/0x110
>  ? __pfx_fsm_main_thread+0x10/0x10 [mtk_t7xx (HASH:1400 5)]
>  ? __pfx_kthread+0x10/0x10
>  ret_from_fork+0x38/0x50
>  ? __pfx_kthread+0x10/0x10
>  ret_from_fork_asm+0x1b/0x30
>  </TASK>
> [...]
> CR2: fffffffffffffff8
> 
> [...]

Here is the summary with links:
  - [net,v3] net: wwan: t7xx: Fix FSM command timeout issue
    https://git.kernel.org/netdev/net/c/4f619d518db9

You are awesome, thank you!
-- 
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html




^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-12-31  2:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-24  4:15 [net v3] net: wwan: t7xx: Fix FSM command timeout issue Jinjian Song
2024-12-24  7:55 ` Sergey Ryazanov
2024-12-31  2:10 ` patchwork-bot+netdevbpf

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).