linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] optee: ffa: fix sleep in atomic context
@ 2025-06-02 12:04 Jens Wiklander
  2025-06-02 13:09 ` Jens Wiklander
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Jens Wiklander @ 2025-06-02 12:04 UTC (permalink / raw)
  To: linux-kernel, op-tee
  Cc: Sumit Garg, Jerome Forissier, Sudeep Holla, Jens Wiklander

The OP-TEE driver registers the function notif_callback() for FF-A
notifications. However, this function is called in an atomic context
leading to errors like this when processing asynchronous notifications:

 | BUG: sleeping function called from invalid context at kernel/locking/mutex.c:258
 | in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 9, name: kworker/0:0
 | preempt_count: 1, expected: 0
 | RCU nest depth: 0, expected: 0
 | CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.14.0-00019-g657536ebe0aa #13
 | Hardware name: linux,dummy-virt (DT)
 | Workqueue: ffa_pcpu_irq_notification notif_pcpu_irq_work_fn
 | Call trace:
 |  show_stack+0x18/0x24 (C)
 |  dump_stack_lvl+0x78/0x90
 |  dump_stack+0x18/0x24
 |  __might_resched+0x114/0x170
 |  __might_sleep+0x48/0x98
 |  mutex_lock+0x24/0x80
 |  optee_get_msg_arg+0x7c/0x21c
 |  simple_call_with_arg+0x50/0xc0
 |  optee_do_bottom_half+0x14/0x20
 |  notif_callback+0x3c/0x48
 |  handle_notif_callbacks+0x9c/0xe0
 |  notif_get_and_handle+0x40/0x88
 |  generic_exec_single+0x80/0xc0
 |  smp_call_function_single+0xfc/0x1a0
 |  notif_pcpu_irq_work_fn+0x2c/0x38
 |  process_one_work+0x14c/0x2b4
 |  worker_thread+0x2e4/0x3e0
 |  kthread+0x13c/0x210
 |  ret_from_fork+0x10/0x20

Fix this by adding work queue to process the notification in a
non-atomic context.

Fixes: d0476a59de06 ("optee: ffa_abi: add asynchronous notifications")
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 drivers/tee/optee/ffa_abi.c       | 41 ++++++++++++++++++++++++-------
 drivers/tee/optee/optee_private.h |  2 ++
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c
index f3af5666bb11..f9ef7d94cebd 100644
--- a/drivers/tee/optee/ffa_abi.c
+++ b/drivers/tee/optee/ffa_abi.c
@@ -728,12 +728,21 @@ static bool optee_ffa_exchange_caps(struct ffa_device *ffa_dev,
 	return true;
 }
 
+static void notif_work_fn(struct work_struct *work)
+{
+	struct optee_ffa *optee_ffa = container_of(work, struct optee_ffa,
+						   notif_work);
+	struct optee *optee = container_of(optee_ffa, struct optee, ffa);
+
+	optee_do_bottom_half(optee->ctx);
+}
+
 static void notif_callback(int notify_id, void *cb_data)
 {
 	struct optee *optee = cb_data;
 
 	if (notify_id == optee->ffa.bottom_half_value)
-		optee_do_bottom_half(optee->ctx);
+		queue_work(optee->ffa.notif_wq, &optee->ffa.notif_work);
 	else
 		optee_notif_send(optee, notify_id);
 }
@@ -817,9 +826,11 @@ static void optee_ffa_remove(struct ffa_device *ffa_dev)
 	struct optee *optee = ffa_dev_get_drvdata(ffa_dev);
 	u32 bottom_half_id = optee->ffa.bottom_half_value;
 
-	if (bottom_half_id != U32_MAX)
+	if (bottom_half_id != U32_MAX) {
 		ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev,
 							      bottom_half_id);
+		destroy_workqueue(optee->ffa.notif_wq);
+	}
 	optee_remove_common(optee);
 
 	mutex_destroy(&optee->ffa.mutex);
@@ -835,6 +846,13 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev,
 	u32 notif_id = 0;
 	int rc;
 
+	INIT_WORK(&optee->ffa.notif_work, notif_work_fn);
+	optee->ffa.notif_wq = create_workqueue("optee_notification");
+	if (!optee->ffa.notif_wq) {
+		rc = -EINVAL;
+		goto err;
+	}
+
 	while (true) {
 		rc = ffa_dev->ops->notifier_ops->notify_request(ffa_dev,
 								is_per_vcpu,
@@ -851,19 +869,24 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev,
 		 * notifications in that case.
 		 */
 		if (rc != -EACCES)
-			return rc;
+			goto err_wq;
 		notif_id++;
 		if (notif_id >= OPTEE_FFA_MAX_ASYNC_NOTIF_VALUE)
-			return rc;
+			goto err_wq;
 	}
 	optee->ffa.bottom_half_value = notif_id;
 
 	rc = enable_async_notif(optee);
-	if (rc < 0) {
-		ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev,
-							      notif_id);
-		optee->ffa.bottom_half_value = U32_MAX;
-	}
+	if (rc < 0)
+		goto err_rel;
+
+	return 0;
+err_rel:
+	ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, notif_id);
+err_wq:
+	destroy_workqueue(optee->ffa.notif_wq);
+err:
+	optee->ffa.bottom_half_value = U32_MAX;
 
 	return rc;
 }
diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h
index dc0f355ef72a..9526087f0e68 100644
--- a/drivers/tee/optee/optee_private.h
+++ b/drivers/tee/optee/optee_private.h
@@ -165,6 +165,8 @@ struct optee_ffa {
 	/* Serializes access to @global_ids */
 	struct mutex mutex;
 	struct rhashtable global_ids;
+	struct workqueue_struct *notif_wq;
+	struct work_struct notif_work;
 };
 
 struct optee;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] optee: ffa: fix sleep in atomic context
  2025-06-02 12:04 [PATCH] optee: ffa: fix sleep in atomic context Jens Wiklander
@ 2025-06-02 13:09 ` Jens Wiklander
  2025-06-05 12:39 ` Sudeep Holla
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Jens Wiklander @ 2025-06-02 13:09 UTC (permalink / raw)
  To: linux-kernel, op-tee; +Cc: Jerome Forissier, Sudeep Holla, Sumit Garg

[CC Sumit Garg <sumit.garg@kernel.org>]

Cheers,
Jens

On Mon, Jun 2, 2025 at 2:05 PM Jens Wiklander <jens.wiklander@linaro.org> wrote:
>
> The OP-TEE driver registers the function notif_callback() for FF-A
> notifications. However, this function is called in an atomic context
> leading to errors like this when processing asynchronous notifications:
>
>  | BUG: sleeping function called from invalid context at kernel/locking/mutex.c:258
>  | in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 9, name: kworker/0:0
>  | preempt_count: 1, expected: 0
>  | RCU nest depth: 0, expected: 0
>  | CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.14.0-00019-g657536ebe0aa #13
>  | Hardware name: linux,dummy-virt (DT)
>  | Workqueue: ffa_pcpu_irq_notification notif_pcpu_irq_work_fn
>  | Call trace:
>  |  show_stack+0x18/0x24 (C)
>  |  dump_stack_lvl+0x78/0x90
>  |  dump_stack+0x18/0x24
>  |  __might_resched+0x114/0x170
>  |  __might_sleep+0x48/0x98
>  |  mutex_lock+0x24/0x80
>  |  optee_get_msg_arg+0x7c/0x21c
>  |  simple_call_with_arg+0x50/0xc0
>  |  optee_do_bottom_half+0x14/0x20
>  |  notif_callback+0x3c/0x48
>  |  handle_notif_callbacks+0x9c/0xe0
>  |  notif_get_and_handle+0x40/0x88
>  |  generic_exec_single+0x80/0xc0
>  |  smp_call_function_single+0xfc/0x1a0
>  |  notif_pcpu_irq_work_fn+0x2c/0x38
>  |  process_one_work+0x14c/0x2b4
>  |  worker_thread+0x2e4/0x3e0
>  |  kthread+0x13c/0x210
>  |  ret_from_fork+0x10/0x20
>
> Fix this by adding work queue to process the notification in a
> non-atomic context.
>
> Fixes: d0476a59de06 ("optee: ffa_abi: add asynchronous notifications")
> Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
> ---
>  drivers/tee/optee/ffa_abi.c       | 41 ++++++++++++++++++++++++-------
>  drivers/tee/optee/optee_private.h |  2 ++
>  2 files changed, 34 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c
> index f3af5666bb11..f9ef7d94cebd 100644
> --- a/drivers/tee/optee/ffa_abi.c
> +++ b/drivers/tee/optee/ffa_abi.c
> @@ -728,12 +728,21 @@ static bool optee_ffa_exchange_caps(struct ffa_device *ffa_dev,
>         return true;
>  }
>
> +static void notif_work_fn(struct work_struct *work)
> +{
> +       struct optee_ffa *optee_ffa = container_of(work, struct optee_ffa,
> +                                                  notif_work);
> +       struct optee *optee = container_of(optee_ffa, struct optee, ffa);
> +
> +       optee_do_bottom_half(optee->ctx);
> +}
> +
>  static void notif_callback(int notify_id, void *cb_data)
>  {
>         struct optee *optee = cb_data;
>
>         if (notify_id == optee->ffa.bottom_half_value)
> -               optee_do_bottom_half(optee->ctx);
> +               queue_work(optee->ffa.notif_wq, &optee->ffa.notif_work);
>         else
>                 optee_notif_send(optee, notify_id);
>  }
> @@ -817,9 +826,11 @@ static void optee_ffa_remove(struct ffa_device *ffa_dev)
>         struct optee *optee = ffa_dev_get_drvdata(ffa_dev);
>         u32 bottom_half_id = optee->ffa.bottom_half_value;
>
> -       if (bottom_half_id != U32_MAX)
> +       if (bottom_half_id != U32_MAX) {
>                 ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev,
>                                                               bottom_half_id);
> +               destroy_workqueue(optee->ffa.notif_wq);
> +       }
>         optee_remove_common(optee);
>
>         mutex_destroy(&optee->ffa.mutex);
> @@ -835,6 +846,13 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev,
>         u32 notif_id = 0;
>         int rc;
>
> +       INIT_WORK(&optee->ffa.notif_work, notif_work_fn);
> +       optee->ffa.notif_wq = create_workqueue("optee_notification");
> +       if (!optee->ffa.notif_wq) {
> +               rc = -EINVAL;
> +               goto err;
> +       }
> +
>         while (true) {
>                 rc = ffa_dev->ops->notifier_ops->notify_request(ffa_dev,
>                                                                 is_per_vcpu,
> @@ -851,19 +869,24 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev,
>                  * notifications in that case.
>                  */
>                 if (rc != -EACCES)
> -                       return rc;
> +                       goto err_wq;
>                 notif_id++;
>                 if (notif_id >= OPTEE_FFA_MAX_ASYNC_NOTIF_VALUE)
> -                       return rc;
> +                       goto err_wq;
>         }
>         optee->ffa.bottom_half_value = notif_id;
>
>         rc = enable_async_notif(optee);
> -       if (rc < 0) {
> -               ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev,
> -                                                             notif_id);
> -               optee->ffa.bottom_half_value = U32_MAX;
> -       }
> +       if (rc < 0)
> +               goto err_rel;
> +
> +       return 0;
> +err_rel:
> +       ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, notif_id);
> +err_wq:
> +       destroy_workqueue(optee->ffa.notif_wq);
> +err:
> +       optee->ffa.bottom_half_value = U32_MAX;
>
>         return rc;
>  }
> diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h
> index dc0f355ef72a..9526087f0e68 100644
> --- a/drivers/tee/optee/optee_private.h
> +++ b/drivers/tee/optee/optee_private.h
> @@ -165,6 +165,8 @@ struct optee_ffa {
>         /* Serializes access to @global_ids */
>         struct mutex mutex;
>         struct rhashtable global_ids;
> +       struct workqueue_struct *notif_wq;
> +       struct work_struct notif_work;
>  };
>
>  struct optee;
> --
> 2.43.0
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] optee: ffa: fix sleep in atomic context
  2025-06-02 12:04 [PATCH] optee: ffa: fix sleep in atomic context Jens Wiklander
  2025-06-02 13:09 ` Jens Wiklander
@ 2025-06-05 12:39 ` Sudeep Holla
  2025-06-11 12:36 ` Sumit Garg
  2025-06-11 12:37 ` Sumit Garg
  3 siblings, 0 replies; 7+ messages in thread
From: Sudeep Holla @ 2025-06-05 12:39 UTC (permalink / raw)
  To: Jens Wiklander; +Cc: linux-kernel, op-tee, Sumit Garg, Jerome Forissier

On Mon, Jun 02, 2025 at 02:04:35PM +0200, Jens Wiklander wrote:
> The OP-TEE driver registers the function notif_callback() for FF-A
> notifications. However, this function is called in an atomic context
> leading to errors like this when processing asynchronous notifications:
> 
>  | BUG: sleeping function called from invalid context at kernel/locking/mutex.c:258
>  | in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 9, name: kworker/0:0
>  | preempt_count: 1, expected: 0
>  | RCU nest depth: 0, expected: 0
>  | CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.14.0-00019-g657536ebe0aa #13
>  | Hardware name: linux,dummy-virt (DT)
>  | Workqueue: ffa_pcpu_irq_notification notif_pcpu_irq_work_fn
>  | Call trace:
>  |  show_stack+0x18/0x24 (C)
>  |  dump_stack_lvl+0x78/0x90
>  |  dump_stack+0x18/0x24
>  |  __might_resched+0x114/0x170
>  |  __might_sleep+0x48/0x98
>  |  mutex_lock+0x24/0x80
>  |  optee_get_msg_arg+0x7c/0x21c
>  |  simple_call_with_arg+0x50/0xc0
>  |  optee_do_bottom_half+0x14/0x20
>  |  notif_callback+0x3c/0x48
>  |  handle_notif_callbacks+0x9c/0xe0
>  |  notif_get_and_handle+0x40/0x88
>  |  generic_exec_single+0x80/0xc0
>  |  smp_call_function_single+0xfc/0x1a0
>  |  notif_pcpu_irq_work_fn+0x2c/0x38
>  |  process_one_work+0x14c/0x2b4
>  |  worker_thread+0x2e4/0x3e0
>  |  kthread+0x13c/0x210
>  |  ret_from_fork+0x10/0x20
> 
> Fix this by adding work queue to process the notification in a
> non-atomic context.

Tested-by: Sudeep Holla <sudeep.holla@arm.com>

-- 
Regards,
Sudeep

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] optee: ffa: fix sleep in atomic context
  2025-06-02 12:04 [PATCH] optee: ffa: fix sleep in atomic context Jens Wiklander
  2025-06-02 13:09 ` Jens Wiklander
  2025-06-05 12:39 ` Sudeep Holla
@ 2025-06-11 12:36 ` Sumit Garg
  2025-06-11 12:37 ` Sumit Garg
  3 siblings, 0 replies; 7+ messages in thread
From: Sumit Garg @ 2025-06-11 12:36 UTC (permalink / raw)
  To: Jens Wiklander; +Cc: linux-kernel, op-tee, Jerome Forissier, Sudeep Holla

On Mon, Jun 02, 2025 at 02:04:35PM +0200, Jens Wiklander wrote:
> The OP-TEE driver registers the function notif_callback() for FF-A
> notifications. However, this function is called in an atomic context
> leading to errors like this when processing asynchronous notifications:
> 
>  | BUG: sleeping function called from invalid context at kernel/locking/mutex.c:258
>  | in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 9, name: kworker/0:0
>  | preempt_count: 1, expected: 0
>  | RCU nest depth: 0, expected: 0
>  | CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.14.0-00019-g657536ebe0aa #13
>  | Hardware name: linux,dummy-virt (DT)
>  | Workqueue: ffa_pcpu_irq_notification notif_pcpu_irq_work_fn
>  | Call trace:
>  |  show_stack+0x18/0x24 (C)
>  |  dump_stack_lvl+0x78/0x90
>  |  dump_stack+0x18/0x24
>  |  __might_resched+0x114/0x170
>  |  __might_sleep+0x48/0x98
>  |  mutex_lock+0x24/0x80
>  |  optee_get_msg_arg+0x7c/0x21c
>  |  simple_call_with_arg+0x50/0xc0
>  |  optee_do_bottom_half+0x14/0x20
>  |  notif_callback+0x3c/0x48
>  |  handle_notif_callbacks+0x9c/0xe0
>  |  notif_get_and_handle+0x40/0x88
>  |  generic_exec_single+0x80/0xc0
>  |  smp_call_function_single+0xfc/0x1a0
>  |  notif_pcpu_irq_work_fn+0x2c/0x38
>  |  process_one_work+0x14c/0x2b4
>  |  worker_thread+0x2e4/0x3e0
>  |  kthread+0x13c/0x210
>  |  ret_from_fork+0x10/0x20
> 
> Fix this by adding work queue to process the notification in a
> non-atomic context.
> 
> Fixes: d0476a59de06 ("optee: ffa_abi: add asynchronous notifications")
> Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
> ---
>  drivers/tee/optee/ffa_abi.c       | 41 ++++++++++++++++++++++++-------
>  drivers/tee/optee/optee_private.h |  2 ++
>  2 files changed, 34 insertions(+), 9 deletions(-)

Sounds reasonable fix to me, FWIW:

Reviewed-by: Sumit Garg <sumit.garg@oss.qualcomm.com>

-Sumit

> 
> diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c
> index f3af5666bb11..f9ef7d94cebd 100644
> --- a/drivers/tee/optee/ffa_abi.c
> +++ b/drivers/tee/optee/ffa_abi.c
> @@ -728,12 +728,21 @@ static bool optee_ffa_exchange_caps(struct ffa_device *ffa_dev,
>  	return true;
>  }
>  
> +static void notif_work_fn(struct work_struct *work)
> +{
> +	struct optee_ffa *optee_ffa = container_of(work, struct optee_ffa,
> +						   notif_work);
> +	struct optee *optee = container_of(optee_ffa, struct optee, ffa);
> +
> +	optee_do_bottom_half(optee->ctx);
> +}
> +
>  static void notif_callback(int notify_id, void *cb_data)
>  {
>  	struct optee *optee = cb_data;
>  
>  	if (notify_id == optee->ffa.bottom_half_value)
> -		optee_do_bottom_half(optee->ctx);
> +		queue_work(optee->ffa.notif_wq, &optee->ffa.notif_work);
>  	else
>  		optee_notif_send(optee, notify_id);
>  }
> @@ -817,9 +826,11 @@ static void optee_ffa_remove(struct ffa_device *ffa_dev)
>  	struct optee *optee = ffa_dev_get_drvdata(ffa_dev);
>  	u32 bottom_half_id = optee->ffa.bottom_half_value;
>  
> -	if (bottom_half_id != U32_MAX)
> +	if (bottom_half_id != U32_MAX) {
>  		ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev,
>  							      bottom_half_id);
> +		destroy_workqueue(optee->ffa.notif_wq);
> +	}
>  	optee_remove_common(optee);
>  
>  	mutex_destroy(&optee->ffa.mutex);
> @@ -835,6 +846,13 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev,
>  	u32 notif_id = 0;
>  	int rc;
>  
> +	INIT_WORK(&optee->ffa.notif_work, notif_work_fn);
> +	optee->ffa.notif_wq = create_workqueue("optee_notification");
> +	if (!optee->ffa.notif_wq) {
> +		rc = -EINVAL;
> +		goto err;
> +	}
> +
>  	while (true) {
>  		rc = ffa_dev->ops->notifier_ops->notify_request(ffa_dev,
>  								is_per_vcpu,
> @@ -851,19 +869,24 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev,
>  		 * notifications in that case.
>  		 */
>  		if (rc != -EACCES)
> -			return rc;
> +			goto err_wq;
>  		notif_id++;
>  		if (notif_id >= OPTEE_FFA_MAX_ASYNC_NOTIF_VALUE)
> -			return rc;
> +			goto err_wq;
>  	}
>  	optee->ffa.bottom_half_value = notif_id;
>  
>  	rc = enable_async_notif(optee);
> -	if (rc < 0) {
> -		ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev,
> -							      notif_id);
> -		optee->ffa.bottom_half_value = U32_MAX;
> -	}
> +	if (rc < 0)
> +		goto err_rel;
> +
> +	return 0;
> +err_rel:
> +	ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, notif_id);
> +err_wq:
> +	destroy_workqueue(optee->ffa.notif_wq);
> +err:
> +	optee->ffa.bottom_half_value = U32_MAX;
>  
>  	return rc;
>  }
> diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h
> index dc0f355ef72a..9526087f0e68 100644
> --- a/drivers/tee/optee/optee_private.h
> +++ b/drivers/tee/optee/optee_private.h
> @@ -165,6 +165,8 @@ struct optee_ffa {
>  	/* Serializes access to @global_ids */
>  	struct mutex mutex;
>  	struct rhashtable global_ids;
> +	struct workqueue_struct *notif_wq;
> +	struct work_struct notif_work;
>  };
>  
>  struct optee;
> -- 
> 2.43.0
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] optee: ffa: fix sleep in atomic context
  2025-06-02 12:04 [PATCH] optee: ffa: fix sleep in atomic context Jens Wiklander
                   ` (2 preceding siblings ...)
  2025-06-11 12:36 ` Sumit Garg
@ 2025-06-11 12:37 ` Sumit Garg
  2025-06-11 12:45   ` Sudeep Holla
  3 siblings, 1 reply; 7+ messages in thread
From: Sumit Garg @ 2025-06-11 12:37 UTC (permalink / raw)
  To: Jens Wiklander; +Cc: linux-kernel, op-tee, Jerome Forissier, Sudeep Holla

On Mon, Jun 02, 2025 at 02:04:35PM +0200, Jens Wiklander wrote:
> The OP-TEE driver registers the function notif_callback() for FF-A
> notifications. However, this function is called in an atomic context
> leading to errors like this when processing asynchronous notifications:
> 
>  | BUG: sleeping function called from invalid context at kernel/locking/mutex.c:258
>  | in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 9, name: kworker/0:0
>  | preempt_count: 1, expected: 0
>  | RCU nest depth: 0, expected: 0
>  | CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.14.0-00019-g657536ebe0aa #13
>  | Hardware name: linux,dummy-virt (DT)
>  | Workqueue: ffa_pcpu_irq_notification notif_pcpu_irq_work_fn
>  | Call trace:
>  |  show_stack+0x18/0x24 (C)
>  |  dump_stack_lvl+0x78/0x90
>  |  dump_stack+0x18/0x24
>  |  __might_resched+0x114/0x170
>  |  __might_sleep+0x48/0x98
>  |  mutex_lock+0x24/0x80
>  |  optee_get_msg_arg+0x7c/0x21c
>  |  simple_call_with_arg+0x50/0xc0
>  |  optee_do_bottom_half+0x14/0x20
>  |  notif_callback+0x3c/0x48
>  |  handle_notif_callbacks+0x9c/0xe0
>  |  notif_get_and_handle+0x40/0x88
>  |  generic_exec_single+0x80/0xc0
>  |  smp_call_function_single+0xfc/0x1a0
>  |  notif_pcpu_irq_work_fn+0x2c/0x38
>  |  process_one_work+0x14c/0x2b4
>  |  worker_thread+0x2e4/0x3e0
>  |  kthread+0x13c/0x210
>  |  ret_from_fork+0x10/0x20
> 
> Fix this by adding work queue to process the notification in a
> non-atomic context.
> 
> Fixes: d0476a59de06 ("optee: ffa_abi: add asynchronous notifications")
> Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>

Forgot to mention, let's explicitly CC for stable kernel backport here.

-Sumit

> ---
>  drivers/tee/optee/ffa_abi.c       | 41 ++++++++++++++++++++++++-------
>  drivers/tee/optee/optee_private.h |  2 ++
>  2 files changed, 34 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c
> index f3af5666bb11..f9ef7d94cebd 100644
> --- a/drivers/tee/optee/ffa_abi.c
> +++ b/drivers/tee/optee/ffa_abi.c
> @@ -728,12 +728,21 @@ static bool optee_ffa_exchange_caps(struct ffa_device *ffa_dev,
>  	return true;
>  }
>  
> +static void notif_work_fn(struct work_struct *work)
> +{
> +	struct optee_ffa *optee_ffa = container_of(work, struct optee_ffa,
> +						   notif_work);
> +	struct optee *optee = container_of(optee_ffa, struct optee, ffa);
> +
> +	optee_do_bottom_half(optee->ctx);
> +}
> +
>  static void notif_callback(int notify_id, void *cb_data)
>  {
>  	struct optee *optee = cb_data;
>  
>  	if (notify_id == optee->ffa.bottom_half_value)
> -		optee_do_bottom_half(optee->ctx);
> +		queue_work(optee->ffa.notif_wq, &optee->ffa.notif_work);
>  	else
>  		optee_notif_send(optee, notify_id);
>  }
> @@ -817,9 +826,11 @@ static void optee_ffa_remove(struct ffa_device *ffa_dev)
>  	struct optee *optee = ffa_dev_get_drvdata(ffa_dev);
>  	u32 bottom_half_id = optee->ffa.bottom_half_value;
>  
> -	if (bottom_half_id != U32_MAX)
> +	if (bottom_half_id != U32_MAX) {
>  		ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev,
>  							      bottom_half_id);
> +		destroy_workqueue(optee->ffa.notif_wq);
> +	}
>  	optee_remove_common(optee);
>  
>  	mutex_destroy(&optee->ffa.mutex);
> @@ -835,6 +846,13 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev,
>  	u32 notif_id = 0;
>  	int rc;
>  
> +	INIT_WORK(&optee->ffa.notif_work, notif_work_fn);
> +	optee->ffa.notif_wq = create_workqueue("optee_notification");
> +	if (!optee->ffa.notif_wq) {
> +		rc = -EINVAL;
> +		goto err;
> +	}
> +
>  	while (true) {
>  		rc = ffa_dev->ops->notifier_ops->notify_request(ffa_dev,
>  								is_per_vcpu,
> @@ -851,19 +869,24 @@ static int optee_ffa_async_notif_init(struct ffa_device *ffa_dev,
>  		 * notifications in that case.
>  		 */
>  		if (rc != -EACCES)
> -			return rc;
> +			goto err_wq;
>  		notif_id++;
>  		if (notif_id >= OPTEE_FFA_MAX_ASYNC_NOTIF_VALUE)
> -			return rc;
> +			goto err_wq;
>  	}
>  	optee->ffa.bottom_half_value = notif_id;
>  
>  	rc = enable_async_notif(optee);
> -	if (rc < 0) {
> -		ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev,
> -							      notif_id);
> -		optee->ffa.bottom_half_value = U32_MAX;
> -	}
> +	if (rc < 0)
> +		goto err_rel;
> +
> +	return 0;
> +err_rel:
> +	ffa_dev->ops->notifier_ops->notify_relinquish(ffa_dev, notif_id);
> +err_wq:
> +	destroy_workqueue(optee->ffa.notif_wq);
> +err:
> +	optee->ffa.bottom_half_value = U32_MAX;
>  
>  	return rc;
>  }
> diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h
> index dc0f355ef72a..9526087f0e68 100644
> --- a/drivers/tee/optee/optee_private.h
> +++ b/drivers/tee/optee/optee_private.h
> @@ -165,6 +165,8 @@ struct optee_ffa {
>  	/* Serializes access to @global_ids */
>  	struct mutex mutex;
>  	struct rhashtable global_ids;
> +	struct workqueue_struct *notif_wq;
> +	struct work_struct notif_work;
>  };
>  
>  struct optee;
> -- 
> 2.43.0
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] optee: ffa: fix sleep in atomic context
  2025-06-11 12:37 ` Sumit Garg
@ 2025-06-11 12:45   ` Sudeep Holla
  2025-06-12  9:58     ` Jens Wiklander
  0 siblings, 1 reply; 7+ messages in thread
From: Sudeep Holla @ 2025-06-11 12:45 UTC (permalink / raw)
  To: Sumit Garg
  Cc: Jens Wiklander, linux-kernel, op-tee, Jerome Forissier,
	Sudeep Holla

On Wed, Jun 11, 2025 at 06:07:37PM +0530, Sumit Garg wrote:
> On Mon, Jun 02, 2025 at 02:04:35PM +0200, Jens Wiklander wrote:
> > The OP-TEE driver registers the function notif_callback() for FF-A
> > notifications. However, this function is called in an atomic context
> > leading to errors like this when processing asynchronous notifications:
> > 
> >  | BUG: sleeping function called from invalid context at kernel/locking/mutex.c:258
> >  | in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 9, name: kworker/0:0
> >  | preempt_count: 1, expected: 0
> >  | RCU nest depth: 0, expected: 0
> >  | CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.14.0-00019-g657536ebe0aa #13
> >  | Hardware name: linux,dummy-virt (DT)
> >  | Workqueue: ffa_pcpu_irq_notification notif_pcpu_irq_work_fn
> >  | Call trace:
> >  |  show_stack+0x18/0x24 (C)
> >  |  dump_stack_lvl+0x78/0x90
> >  |  dump_stack+0x18/0x24
> >  |  __might_resched+0x114/0x170
> >  |  __might_sleep+0x48/0x98
> >  |  mutex_lock+0x24/0x80
> >  |  optee_get_msg_arg+0x7c/0x21c
> >  |  simple_call_with_arg+0x50/0xc0
> >  |  optee_do_bottom_half+0x14/0x20
> >  |  notif_callback+0x3c/0x48
> >  |  handle_notif_callbacks+0x9c/0xe0
> >  |  notif_get_and_handle+0x40/0x88
> >  |  generic_exec_single+0x80/0xc0
> >  |  smp_call_function_single+0xfc/0x1a0
> >  |  notif_pcpu_irq_work_fn+0x2c/0x38
> >  |  process_one_work+0x14c/0x2b4
> >  |  worker_thread+0x2e4/0x3e0
> >  |  kthread+0x13c/0x210
> >  |  ret_from_fork+0x10/0x20
> > 
> > Fix this by adding work queue to process the notification in a
> > non-atomic context.
> > 
> > Fixes: d0476a59de06 ("optee: ffa_abi: add asynchronous notifications")
> > Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
> 
> Forgot to mention, let's explicitly CC for stable kernel backport here.

Makes sense.

Jens,

Just FYI:

Here is the FF-A fix PR to Arnd if you need any reference:
https://lore.kernel.org/all/20250609105207.1185570-1-sudeep.holla@arm.com

I haven't tagged it for stable assuming Fixes: tag ones get selected and
also it is not trivial to apply. I do have the backports also ready to
send once merged upstream.

-- 
Regards,
Sudeep

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] optee: ffa: fix sleep in atomic context
  2025-06-11 12:45   ` Sudeep Holla
@ 2025-06-12  9:58     ` Jens Wiklander
  0 siblings, 0 replies; 7+ messages in thread
From: Jens Wiklander @ 2025-06-12  9:58 UTC (permalink / raw)
  To: Sudeep Holla; +Cc: Sumit Garg, linux-kernel, op-tee, Jerome Forissier

On Wed, Jun 11, 2025 at 2:45 PM Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> On Wed, Jun 11, 2025 at 06:07:37PM +0530, Sumit Garg wrote:
> > On Mon, Jun 02, 2025 at 02:04:35PM +0200, Jens Wiklander wrote:
> > > The OP-TEE driver registers the function notif_callback() for FF-A
> > > notifications. However, this function is called in an atomic context
> > > leading to errors like this when processing asynchronous notifications:
> > >
> > >  | BUG: sleeping function called from invalid context at kernel/locking/mutex.c:258
> > >  | in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 9, name: kworker/0:0
> > >  | preempt_count: 1, expected: 0
> > >  | RCU nest depth: 0, expected: 0
> > >  | CPU: 0 UID: 0 PID: 9 Comm: kworker/0:0 Not tainted 6.14.0-00019-g657536ebe0aa #13
> > >  | Hardware name: linux,dummy-virt (DT)
> > >  | Workqueue: ffa_pcpu_irq_notification notif_pcpu_irq_work_fn
> > >  | Call trace:
> > >  |  show_stack+0x18/0x24 (C)
> > >  |  dump_stack_lvl+0x78/0x90
> > >  |  dump_stack+0x18/0x24
> > >  |  __might_resched+0x114/0x170
> > >  |  __might_sleep+0x48/0x98
> > >  |  mutex_lock+0x24/0x80
> > >  |  optee_get_msg_arg+0x7c/0x21c
> > >  |  simple_call_with_arg+0x50/0xc0
> > >  |  optee_do_bottom_half+0x14/0x20
> > >  |  notif_callback+0x3c/0x48
> > >  |  handle_notif_callbacks+0x9c/0xe0
> > >  |  notif_get_and_handle+0x40/0x88
> > >  |  generic_exec_single+0x80/0xc0
> > >  |  smp_call_function_single+0xfc/0x1a0
> > >  |  notif_pcpu_irq_work_fn+0x2c/0x38
> > >  |  process_one_work+0x14c/0x2b4
> > >  |  worker_thread+0x2e4/0x3e0
> > >  |  kthread+0x13c/0x210
> > >  |  ret_from_fork+0x10/0x20
> > >
> > > Fix this by adding work queue to process the notification in a
> > > non-atomic context.
> > >
> > > Fixes: d0476a59de06 ("optee: ffa_abi: add asynchronous notifications")
> > > Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
> >
> > Forgot to mention, let's explicitly CC for stable kernel backport here.
>
> Makes sense.

Sure, I'll fix up the commit message before sending the PR.

>
> Jens,
>
> Just FYI:
>
> Here is the FF-A fix PR to Arnd if you need any reference:
> https://lore.kernel.org/all/20250609105207.1185570-1-sudeep.holla@arm.com

Good

Thanks,
Jens

>
> I haven't tagged it for stable assuming Fixes: tag ones get selected and
> also it is not trivial to apply. I do have the backports also ready to
> send once merged upstream.
>
> --
> Regards,
> Sudeep

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-06-12  9:58 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-02 12:04 [PATCH] optee: ffa: fix sleep in atomic context Jens Wiklander
2025-06-02 13:09 ` Jens Wiklander
2025-06-05 12:39 ` Sudeep Holla
2025-06-11 12:36 ` Sumit Garg
2025-06-11 12:37 ` Sumit Garg
2025-06-11 12:45   ` Sudeep Holla
2025-06-12  9:58     ` Jens Wiklander

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).