[PATCH v3 0/2] NVKM GSP RPC message handling policy

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH v3 0/2] NVKM GSP RPC message handling policy
@ 2025-02-27  1:35 Zhi Wang
  2025-02-27  1:35 ` [PATCH v3 1/2] drm/nouveau/nvkm: factor out current GSP RPC command policies Zhi Wang
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Zhi Wang @ 2025-02-27  1:35 UTC (permalink / raw)
  To: nouveau
  Cc: airlied, daniel, dakr, lyude, bskeggs, acurrid, cjia, smitra,
	ankita, aniketa, kwankhede, targupta, zhiw, zhiwang

Hi folks:

Here are the v3 version of NVKM GSP RPC message handling policy which
solves the issue reported by Ben. More details can be found in v1 [1].

v3:

- Fix a hidden use-after-free caught by KFENCE in PATCH 1,
  r535_gsp_rpc_send(). (Ben)

Zhi Wang (2):
  drm/nouveau/nvkm: factor out current GSP RPC command policies
  drm/nouveau/nvkm: introduce new GSP reply policy
    NVKM_GSP_RPC_REPLY_POLL

 Documentation/gpu/nouveau.rst                 |  3 +
 .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h | 38 +++++++--
 .../gpu/drm/nouveau/nvkm/subdev/bar/r535.c    |  2 +-
 .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c    | 78 ++++++++++---------
 .../drm/nouveau/nvkm/subdev/instmem/r535.c    |  2 +-
 5 files changed, 79 insertions(+), 44 deletions(-)

-- 
2.43.5


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v3 1/2] drm/nouveau/nvkm: factor out current GSP RPC command policies
  2025-02-27  1:35 [PATCH v3 0/2] NVKM GSP RPC message handling policy Zhi Wang
@ 2025-02-27  1:35 ` Zhi Wang
  2025-02-27  1:46   ` Timur Tabi
  2025-03-14  1:39   ` Alexandre Courbot
  2025-02-27  1:35 ` [PATCH v3 2/2] drm/nouveau/nvkm: introduce new GSP reply policy NVKM_GSP_RPC_REPLY_POLL Zhi Wang
  2025-03-09 13:00 ` [PATCH v3 0/2] NVKM GSP RPC message handling policy Danilo Krummrich
  2 siblings, 2 replies; 7+ messages in thread
From: Zhi Wang @ 2025-02-27  1:35 UTC (permalink / raw)
  To: nouveau
  Cc: airlied, daniel, dakr, lyude, bskeggs, acurrid, cjia, smitra,
	ankita, aniketa, kwankhede, targupta, zhiw, zhiwang,
	Alexandre Courbot

There can be multiple cases of handling the GSP RPC messages, which are
the reply of GSP RPC commands according to the requirement of the
callers and the nature of the GSP RPC commands.

The current supported reply policies are "callers don't care" and "receive
the entire message" according to the requirement of the callers. To
introduce a new policy, factor out the current RPC command reply polices.
Also, centralize the handling of the reply in a single function.

Factor out NVKM_GSP_RPC_REPLY_NOWAIT as "callers don't care" and
NVKM_GSP_RPC_REPLY_RECV as "receive the entire message". Introduce a
kernel doc to document the policies. Factor out
r535_gsp_rpc_handle_reply().

No functional change is intended for small GSP RPC commands. For large GSP
commands, the caller decides the policy of how to handle the returned GSP
RPC message.

Cc: Ben Skeggs <bskeggs@nvidia.com>
Cc: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Zhi Wang <zhiw@nvidia.com>
---
 Documentation/gpu/nouveau.rst                 |  3 +
 .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h | 34 +++++++--
 .../gpu/drm/nouveau/nvkm/subdev/bar/r535.c    |  2 +-
 .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c    | 75 ++++++++++---------
 .../drm/nouveau/nvkm/subdev/instmem/r535.c    |  2 +-
 5 files changed, 72 insertions(+), 44 deletions(-)

diff --git a/Documentation/gpu/nouveau.rst b/Documentation/gpu/nouveau.rst
index 0f34131ccc27..b8c801e0068c 100644
--- a/Documentation/gpu/nouveau.rst
+++ b/Documentation/gpu/nouveau.rst
@@ -27,3 +27,6 @@ GSP Support
 
 .. kernel-doc:: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
    :doc: GSP message queue element
+
+.. kernel-doc:: drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+   :doc: GSP message handling policy
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
index 746e126c3ecf..e5fe44589bbd 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -31,6 +31,25 @@ typedef int (*nvkm_gsp_msg_ntfy_func)(void *priv, u32 fn, void *repv, u32 repc);
 struct nvkm_gsp_event;
 typedef void (*nvkm_gsp_event_func)(struct nvkm_gsp_event *, void *repv, u32 repc);
 
+/**
+ * DOC: GSP message handling policy
+ *
+ * When sending a GSP RPC command, there can be multiple cases of handling
+ * the GSP RPC messages, which are the reply of GSP RPC commands, according
+ * to the requirement of the callers and the nature of the GSP RPC commands.
+ *
+ * NVKM_GSP_RPC_REPLY_NOWAIT - If specified, immediately return to the
+ * caller after the GSP RPC command is issued.
+ *
+ * NVKM_GSP_RPC_REPLY_RECV - If specified, wait and receive the entire GSP
+ * RPC message after the GSP RPC command is issued.
+ *
+ */
+enum nvkm_gsp_rpc_reply_policy {
+	NVKM_GSP_RPC_REPLY_NOWAIT = 0,
+	NVKM_GSP_RPC_REPLY_RECV,
+};
+
 struct nvkm_gsp {
 	const struct nvkm_gsp_func *func;
 	struct nvkm_subdev subdev;
@@ -188,7 +207,8 @@ struct nvkm_gsp {
 
 	const struct nvkm_gsp_rm {
 		void *(*rpc_get)(struct nvkm_gsp *, u32 fn, u32 argc);
-		void *(*rpc_push)(struct nvkm_gsp *, void *argv, bool wait, u32 repc);
+		void *(*rpc_push)(struct nvkm_gsp *gsp, void *argv,
+				  enum nvkm_gsp_rpc_reply_policy policy, u32 repc);
 		void (*rpc_done)(struct nvkm_gsp *gsp, void *repv);
 
 		void *(*rm_ctrl_get)(struct nvkm_gsp_object *, u32 cmd, u32 argc);
@@ -255,9 +275,10 @@ nvkm_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 argc)
 }
 
 static inline void *
-nvkm_gsp_rpc_push(struct nvkm_gsp *gsp, void *argv, bool wait, u32 repc)
+nvkm_gsp_rpc_push(struct nvkm_gsp *gsp, void *argv,
+		  enum nvkm_gsp_rpc_reply_policy policy, u32 repc)
 {
-	return gsp->rm->rpc_push(gsp, argv, wait, repc);
+	return gsp->rm->rpc_push(gsp, argv, policy, repc);
 }
 
 static inline void *
@@ -268,13 +289,14 @@ nvkm_gsp_rpc_rd(struct nvkm_gsp *gsp, u32 fn, u32 argc)
 	if (IS_ERR_OR_NULL(argv))
 		return argv;
 
-	return nvkm_gsp_rpc_push(gsp, argv, true, argc);
+	return nvkm_gsp_rpc_push(gsp, argv, NVKM_GSP_RPC_REPLY_RECV, argc);
 }
 
 static inline int
-nvkm_gsp_rpc_wr(struct nvkm_gsp *gsp, void *argv, bool wait)
+nvkm_gsp_rpc_wr(struct nvkm_gsp *gsp, void *argv,
+		enum nvkm_gsp_rpc_reply_policy policy)
 {
-	void *repv = nvkm_gsp_rpc_push(gsp, argv, wait, 0);
+	void *repv = nvkm_gsp_rpc_push(gsp, argv, policy, 0);
 
 	if (IS_ERR(repv))
 		return PTR_ERR(repv);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
index 3a30bea30e36..90186f98065c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
@@ -56,7 +56,7 @@ r535_bar_bar2_update_pde(struct nvkm_gsp *gsp, u64 addr)
 	rpc->info.entryValue = addr ? ((addr >> 4) | 2) : 0; /* PD3 entry format! */
 	rpc->info.entryLevelShift = 47; //XXX: probably fetch this from mmu!
 
-	return nvkm_gsp_rpc_wr(gsp, rpc, true);
+	return nvkm_gsp_rpc_wr(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV);
 }
 
 static void
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index db2602e88006..f73dcc3e1c0d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -585,13 +585,34 @@ r535_gsp_rpc_poll(struct nvkm_gsp *gsp, u32 fn)
 }
 
 static void *
-r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *payload, bool wait,
-		  u32 gsp_rpc_len)
+r535_gsp_rpc_handle_reply(struct nvkm_gsp *gsp, u32 fn,
+			  enum nvkm_gsp_rpc_reply_policy policy,
+			  u32 gsp_rpc_len)
+{
+	struct nvfw_gsp_rpc *reply;
+	void *repv = NULL;
+
+	switch (policy) {
+	case NVKM_GSP_RPC_REPLY_NOWAIT:
+		break;
+	case NVKM_GSP_RPC_REPLY_RECV:
+		reply = r535_gsp_msg_recv(gsp, fn, gsp_rpc_len);
+		if (!IS_ERR_OR_NULL(reply))
+			repv = reply->data;
+		else
+			repv = reply;
+		break;
+	}
+
+	return repv;
+}
+
+static void *
+r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *payload,
+		  enum nvkm_gsp_rpc_reply_policy policy, u32 gsp_rpc_len)
 {
 	struct nvfw_gsp_rpc *rpc = to_gsp_hdr(payload, rpc);
-	struct nvfw_gsp_rpc *msg;
 	u32 fn = rpc->function;
-	void *repv = NULL;
 	int ret;
 
 	if (gsp->subdev.debug >= NV_DBG_TRACE) {
@@ -605,15 +626,7 @@ r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *payload, bool wait,
 	if (ret)
 		return ERR_PTR(ret);
 
-	if (wait) {
-		msg = r535_gsp_msg_recv(gsp, fn, gsp_rpc_len);
-		if (!IS_ERR_OR_NULL(msg))
-			repv = msg->data;
-		else
-			repv = msg;
-	}
-
-	return repv;
+	return r535_gsp_rpc_handle_reply(gsp, fn, policy, gsp_rpc_len);
 }
 
 static void
@@ -797,7 +810,7 @@ r535_gsp_rpc_rm_free(struct nvkm_gsp_object *object)
 	rpc->params.hRoot = client->object.handle;
 	rpc->params.hObjectParent = 0;
 	rpc->params.hObjectOld = object->handle;
-	return nvkm_gsp_rpc_wr(gsp, rpc, true);
+	return nvkm_gsp_rpc_wr(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV);
 }
 
 static void
@@ -815,7 +828,7 @@ r535_gsp_rpc_rm_alloc_push(struct nvkm_gsp_object *object, void *params)
 	struct nvkm_gsp *gsp = object->client->gsp;
 	void *ret = NULL;
 
-	rpc = nvkm_gsp_rpc_push(gsp, rpc, true, sizeof(*rpc));
+	rpc = nvkm_gsp_rpc_push(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV, sizeof(*rpc));
 	if (IS_ERR_OR_NULL(rpc))
 		return rpc;
 
@@ -876,7 +889,7 @@ r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, void **params, u32 rep
 	struct nvkm_gsp *gsp = object->client->gsp;
 	int ret = 0;
 
-	rpc = nvkm_gsp_rpc_push(gsp, rpc, true, repc);
+	rpc = nvkm_gsp_rpc_push(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV, repc);
 	if (IS_ERR_OR_NULL(rpc)) {
 		*params = NULL;
 		return PTR_ERR(rpc);
@@ -948,8 +961,8 @@ r535_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 payload_size)
 }
 
 static void *
-r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, bool wait,
-		  u32 gsp_rpc_len)
+r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload,
+		  enum nvkm_gsp_rpc_reply_policy policy, u32 gsp_rpc_len)
 {
 	struct nvfw_gsp_rpc *rpc = to_gsp_hdr(payload, rpc);
 	struct r535_gsp_msg *msg = to_gsp_hdr(rpc, msg);
@@ -967,7 +980,7 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, bool wait,
 		rpc->length = sizeof(*rpc) + max_payload_size;
 		msg->checksum = rpc->length;
 
-		repv = r535_gsp_rpc_send(gsp, payload, false, 0);
+		repv = r535_gsp_rpc_send(gsp, payload, NVKM_GSP_RPC_REPLY_NOWAIT, 0);
 		if (IS_ERR(repv))
 			goto done;
 
@@ -988,7 +1001,7 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, bool wait,
 
 			memcpy(next, payload, size);
 
-			repv = r535_gsp_rpc_send(gsp, next, false, 0);
+			repv = r535_gsp_rpc_send(gsp, next, NVKM_GSP_RPC_REPLY_NOWAIT, 0);
 			if (IS_ERR(repv))
 				goto done;
 
@@ -997,20 +1010,10 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, bool wait,
 		}
 
 		/* Wait for reply. */
-		rpc = r535_gsp_msg_recv(gsp, fn, payload_size +
-					sizeof(*rpc));
-		if (!IS_ERR_OR_NULL(rpc)) {
-			if (wait) {
-				repv = rpc->data;
-			} else {
-				nvkm_gsp_rpc_done(gsp, rpc);
-				repv = NULL;
-			}
-		} else {
-			repv = wait ? rpc : NULL;
-		}
+		repv = r535_gsp_rpc_handle_reply(gsp, fn, policy, payload_size +
+						 sizeof(*rpc));
 	} else {
-		repv = r535_gsp_rpc_send(gsp, payload, wait, gsp_rpc_len);
+		repv = r535_gsp_rpc_send(gsp, payload, policy, gsp_rpc_len);
 	}
 
 done:
@@ -1327,7 +1330,7 @@ r535_gsp_rpc_unloading_guest_driver(struct nvkm_gsp *gsp, bool suspend)
 		rpc->newLevel = NV2080_CTRL_GPU_SET_POWER_STATE_GPU_LEVEL_0;
 	}
 
-	return nvkm_gsp_rpc_wr(gsp, rpc, true);
+	return nvkm_gsp_rpc_wr(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV);
 }
 
 enum registry_type {
@@ -1684,7 +1687,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
 
 	build_registry(gsp, rpc);
 
-	return nvkm_gsp_rpc_wr(gsp, rpc, false);
+	return nvkm_gsp_rpc_wr(gsp, rpc, NVKM_GSP_RPC_REPLY_NOWAIT);
 
 fail:
 	clean_registry(gsp);
@@ -1893,7 +1896,7 @@ r535_gsp_rpc_set_system_info(struct nvkm_gsp *gsp)
 	info->pciConfigMirrorSize = 0x001000;
 	r535_gsp_acpi_info(gsp, &info->acpiMethodData);
 
-	return nvkm_gsp_rpc_wr(gsp, info, false);
+	return nvkm_gsp_rpc_wr(gsp, info, NVKM_GSP_RPC_REPLY_NOWAIT);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/r535.c
index 5f3c9c02a4c0..2789efe9c100 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/r535.c
@@ -105,7 +105,7 @@ fbsr_memlist(struct nvkm_gsp_device *device, u32 handle, enum nvkm_memory_target
 			rpc->pteDesc.pte_pde[i].pte = (phys >> 12) + i;
 	}
 
-	ret = nvkm_gsp_rpc_wr(gsp, rpc, true);
+	ret = nvkm_gsp_rpc_wr(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV);
 	if (ret)
 		return ret;
 
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/2] drm/nouveau/nvkm: factor out current GSP RPC command policies
  2025-02-27  1:35 ` [PATCH v3 1/2] drm/nouveau/nvkm: factor out current GSP RPC command policies Zhi Wang
@ 2025-02-27  1:46   ` Timur Tabi
  2025-03-14  1:39   ` Alexandre Courbot
  1 sibling, 0 replies; 7+ messages in thread
From: Timur Tabi @ 2025-02-27  1:46 UTC (permalink / raw)
  To: nouveau@lists.freedesktop.org, Zhi Wang
  Cc: Alexandre Courbot, zhiwang@kernel.org, Surath Mitra, Andy Currid,
	lyude@redhat.com, Kirti Wankhede, airlied@gmail.com,
	Tarun Gupta (SW-GPU), dakr@kernel.org, Ankit Agrawal,
	Aniket Agashe, daniel@ffwll.ch, Ben Skeggs, Neo Jia

On Thu, 2025-02-27 at 01:35 +0000, Zhi Wang wrote:
> +/**
> + * DOC: GSP message handling policy
> + *
> + * When sending a GSP RPC command, there can be multiple cases of handling
> + * the GSP RPC messages, which are the reply of GSP RPC commands, according
> + * to the requirement of the callers and the nature of the GSP RPC
> commands.
> + *
> + * NVKM_GSP_RPC_REPLY_NOWAIT - If specified, immediately return to the
> + * caller after the GSP RPC command is issued.
> + *
> + * NVKM_GSP_RPC_REPLY_RECV - If specified, wait and receive the entire GSP
> + * RPC message after the GSP RPC command is issued.
> + *
> + */
> +enum nvkm_gsp_rpc_reply_policy {
> +	NVKM_GSP_RPC_REPLY_NOWAIT = 0,
> +	NVKM_GSP_RPC_REPLY_RECV,
> +};
> +

I don't think the "= 0" is necessary, as it doesn't matter what the value of
NVKM_GSP_RPC_REPLY_NOWAIT is, since you're not passing it to GSP-RM itself. 
You're only using it as a switch expression in r535_gsp_rpc_handle_reply().

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/2] drm/nouveau/nvkm: factor out current GSP RPC command policies
  2025-02-27  1:35 ` [PATCH v3 1/2] drm/nouveau/nvkm: factor out current GSP RPC command policies Zhi Wang
  2025-02-27  1:46   ` Timur Tabi
@ 2025-03-14  1:39   ` Alexandre Courbot
  2025-03-14  8:09     ` Zhi Wang
  1 sibling, 1 reply; 7+ messages in thread
From: Alexandre Courbot @ 2025-03-14  1:39 UTC (permalink / raw)
  To: Zhi Wang, nouveau
  Cc: airlied, daniel, dakr, lyude, bskeggs, acurrid, cjia, smitra,
	ankita, aniketa, kwankhede, targupta, zhiwang, Nouveau

Hi Zhi,

Thanks, this patch has become super easy to review.

On Thu Feb 27, 2025 at 10:35 AM JST, Zhi Wang wrote:
> There can be multiple cases of handling the GSP RPC messages, which are
> the reply of GSP RPC commands according to the requirement of the
> callers and the nature of the GSP RPC commands.
>
> The current supported reply policies are "callers don't care" and "receive
> the entire message" according to the requirement of the callers. To
> introduce a new policy, factor out the current RPC command reply polices.
> Also, centralize the handling of the reply in a single function.
>
> Factor out NVKM_GSP_RPC_REPLY_NOWAIT as "callers don't care" and
> NVKM_GSP_RPC_REPLY_RECV as "receive the entire message". Introduce a
> kernel doc to document the policies. Factor out
> r535_gsp_rpc_handle_reply().
>
> No functional change is intended for small GSP RPC commands. For large GSP
> commands, the caller decides the policy of how to handle the returned GSP
> RPC message.
>
> Cc: Ben Skeggs <bskeggs@nvidia.com>
> Cc: Alexandre Courbot <acourbot@nvidia.com>
> Signed-off-by: Zhi Wang <zhiw@nvidia.com>
> ---
>  Documentation/gpu/nouveau.rst                 |  3 +
>  .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h | 34 +++++++--
>  .../gpu/drm/nouveau/nvkm/subdev/bar/r535.c    |  2 +-
>  .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c    | 75 ++++++++++---------
>  .../drm/nouveau/nvkm/subdev/instmem/r535.c    |  2 +-
>  5 files changed, 72 insertions(+), 44 deletions(-)
>
> diff --git a/Documentation/gpu/nouveau.rst b/Documentation/gpu/nouveau.rst
> index 0f34131ccc27..b8c801e0068c 100644
> --- a/Documentation/gpu/nouveau.rst
> +++ b/Documentation/gpu/nouveau.rst
> @@ -27,3 +27,6 @@ GSP Support
>  
>  .. kernel-doc:: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
>     :doc: GSP message queue element
> +
> +.. kernel-doc:: drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
> +   :doc: GSP message handling policy
> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
> index 746e126c3ecf..e5fe44589bbd 100644
> --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
> +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
> @@ -31,6 +31,25 @@ typedef int (*nvkm_gsp_msg_ntfy_func)(void *priv, u32 fn, void *repv, u32 repc);
>  struct nvkm_gsp_event;
>  typedef void (*nvkm_gsp_event_func)(struct nvkm_gsp_event *, void *repv, u32 repc);
>  
> +/**
> + * DOC: GSP message handling policy
> + *
> + * When sending a GSP RPC command, there can be multiple cases of handling
> + * the GSP RPC messages, which are the reply of GSP RPC commands, according
> + * to the requirement of the callers and the nature of the GSP RPC commands.
> + *
> + * NVKM_GSP_RPC_REPLY_NOWAIT - If specified, immediately return to the
> + * caller after the GSP RPC command is issued.
> + *
> + * NVKM_GSP_RPC_REPLY_RECV - If specified, wait and receive the entire GSP
> + * RPC message after the GSP RPC command is issued.
> + *
> + */
> +enum nvkm_gsp_rpc_reply_policy {
> +	NVKM_GSP_RPC_REPLY_NOWAIT = 0,
> +	NVKM_GSP_RPC_REPLY_RECV,
> +};
> +
>  struct nvkm_gsp {
>  	const struct nvkm_gsp_func *func;
>  	struct nvkm_subdev subdev;
> @@ -188,7 +207,8 @@ struct nvkm_gsp {
>  
>  	const struct nvkm_gsp_rm {
>  		void *(*rpc_get)(struct nvkm_gsp *, u32 fn, u32 argc);
> -		void *(*rpc_push)(struct nvkm_gsp *, void *argv, bool wait, u32 repc);
> +		void *(*rpc_push)(struct nvkm_gsp *gsp, void *argv,
> +				  enum nvkm_gsp_rpc_reply_policy policy, u32 repc);
>  		void (*rpc_done)(struct nvkm_gsp *gsp, void *repv);
>  
>  		void *(*rm_ctrl_get)(struct nvkm_gsp_object *, u32 cmd, u32 argc);
> @@ -255,9 +275,10 @@ nvkm_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 argc)
>  }
>  
>  static inline void *
> -nvkm_gsp_rpc_push(struct nvkm_gsp *gsp, void *argv, bool wait, u32 repc)
> +nvkm_gsp_rpc_push(struct nvkm_gsp *gsp, void *argv,
> +		  enum nvkm_gsp_rpc_reply_policy policy, u32 repc)
>  {
> -	return gsp->rm->rpc_push(gsp, argv, wait, repc);
> +	return gsp->rm->rpc_push(gsp, argv, policy, repc);
>  }
>  
>  static inline void *
> @@ -268,13 +289,14 @@ nvkm_gsp_rpc_rd(struct nvkm_gsp *gsp, u32 fn, u32 argc)
>  	if (IS_ERR_OR_NULL(argv))
>  		return argv;
>  
> -	return nvkm_gsp_rpc_push(gsp, argv, true, argc);
> +	return nvkm_gsp_rpc_push(gsp, argv, NVKM_GSP_RPC_REPLY_RECV, argc);
>  }
>  
>  static inline int
> -nvkm_gsp_rpc_wr(struct nvkm_gsp *gsp, void *argv, bool wait)
> +nvkm_gsp_rpc_wr(struct nvkm_gsp *gsp, void *argv,
> +		enum nvkm_gsp_rpc_reply_policy policy)
>  {
> -	void *repv = nvkm_gsp_rpc_push(gsp, argv, wait, 0);
> +	void *repv = nvkm_gsp_rpc_push(gsp, argv, policy, 0);
>  
>  	if (IS_ERR(repv))
>  		return PTR_ERR(repv);
> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
> index 3a30bea30e36..90186f98065c 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
> @@ -56,7 +56,7 @@ r535_bar_bar2_update_pde(struct nvkm_gsp *gsp, u64 addr)
>  	rpc->info.entryValue = addr ? ((addr >> 4) | 2) : 0; /* PD3 entry format! */
>  	rpc->info.entryLevelShift = 47; //XXX: probably fetch this from mmu!
>  
> -	return nvkm_gsp_rpc_wr(gsp, rpc, true);
> +	return nvkm_gsp_rpc_wr(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV);
>  }
>  
>  static void
> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
> index db2602e88006..f73dcc3e1c0d 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
> @@ -585,13 +585,34 @@ r535_gsp_rpc_poll(struct nvkm_gsp *gsp, u32 fn)
>  }
>  
>  static void *
> -r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *payload, bool wait,
> -		  u32 gsp_rpc_len)
> +r535_gsp_rpc_handle_reply(struct nvkm_gsp *gsp, u32 fn,
> +			  enum nvkm_gsp_rpc_reply_policy policy,
> +			  u32 gsp_rpc_len)
> +{
> +	struct nvfw_gsp_rpc *reply;
> +	void *repv = NULL;
> +
> +	switch (policy) {
> +	case NVKM_GSP_RPC_REPLY_NOWAIT:
> +		break;
> +	case NVKM_GSP_RPC_REPLY_RECV:
> +		reply = r535_gsp_msg_recv(gsp, fn, gsp_rpc_len);
> +		if (!IS_ERR_OR_NULL(reply))
> +			repv = reply->data;
> +		else
> +			repv = reply;
> +		break;
> +	}
> +
> +	return repv;
> +}
> +
> +static void *
> +r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *payload,
> +		  enum nvkm_gsp_rpc_reply_policy policy, u32 gsp_rpc_len)
>  {
>  	struct nvfw_gsp_rpc *rpc = to_gsp_hdr(payload, rpc);
> -	struct nvfw_gsp_rpc *msg;
>  	u32 fn = rpc->function;
> -	void *repv = NULL;
>  	int ret;
>  
>  	if (gsp->subdev.debug >= NV_DBG_TRACE) {
> @@ -605,15 +626,7 @@ r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *payload, bool wait,
>  	if (ret)
>  		return ERR_PTR(ret);
>  
> -	if (wait) {
> -		msg = r535_gsp_msg_recv(gsp, fn, gsp_rpc_len);
> -		if (!IS_ERR_OR_NULL(msg))
> -			repv = msg->data;
> -		else
> -			repv = msg;
> -	}
> -
> -	return repv;
> +	return r535_gsp_rpc_handle_reply(gsp, fn, policy, gsp_rpc_len);
>  }
>  
>  static void
> @@ -797,7 +810,7 @@ r535_gsp_rpc_rm_free(struct nvkm_gsp_object *object)
>  	rpc->params.hRoot = client->object.handle;
>  	rpc->params.hObjectParent = 0;
>  	rpc->params.hObjectOld = object->handle;
> -	return nvkm_gsp_rpc_wr(gsp, rpc, true);
> +	return nvkm_gsp_rpc_wr(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV);
>  }
>  
>  static void
> @@ -815,7 +828,7 @@ r535_gsp_rpc_rm_alloc_push(struct nvkm_gsp_object *object, void *params)
>  	struct nvkm_gsp *gsp = object->client->gsp;
>  	void *ret = NULL;
>  
> -	rpc = nvkm_gsp_rpc_push(gsp, rpc, true, sizeof(*rpc));
> +	rpc = nvkm_gsp_rpc_push(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV, sizeof(*rpc));
>  	if (IS_ERR_OR_NULL(rpc))
>  		return rpc;
>  
> @@ -876,7 +889,7 @@ r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, void **params, u32 rep
>  	struct nvkm_gsp *gsp = object->client->gsp;
>  	int ret = 0;
>  
> -	rpc = nvkm_gsp_rpc_push(gsp, rpc, true, repc);
> +	rpc = nvkm_gsp_rpc_push(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV, repc);
>  	if (IS_ERR_OR_NULL(rpc)) {
>  		*params = NULL;
>  		return PTR_ERR(rpc);
> @@ -948,8 +961,8 @@ r535_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 payload_size)
>  }
>  
>  static void *
> -r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, bool wait,
> -		  u32 gsp_rpc_len)
> +r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload,
> +		  enum nvkm_gsp_rpc_reply_policy policy, u32 gsp_rpc_len)
>  {
>  	struct nvfw_gsp_rpc *rpc = to_gsp_hdr(payload, rpc);
>  	struct r535_gsp_msg *msg = to_gsp_hdr(rpc, msg);
> @@ -967,7 +980,7 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, bool wait,
>  		rpc->length = sizeof(*rpc) + max_payload_size;
>  		msg->checksum = rpc->length;
>  
> -		repv = r535_gsp_rpc_send(gsp, payload, false, 0);
> +		repv = r535_gsp_rpc_send(gsp, payload, NVKM_GSP_RPC_REPLY_NOWAIT, 0);
>  		if (IS_ERR(repv))
>  			goto done;
>  
> @@ -988,7 +1001,7 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, bool wait,
>  
>  			memcpy(next, payload, size);
>  
> -			repv = r535_gsp_rpc_send(gsp, next, false, 0);
> +			repv = r535_gsp_rpc_send(gsp, next, NVKM_GSP_RPC_REPLY_NOWAIT, 0);
>  			if (IS_ERR(repv))
>  				goto done;
>  
> @@ -997,20 +1010,10 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, bool wait,
>  		}
>  
>  		/* Wait for reply. */
> -		rpc = r535_gsp_msg_recv(gsp, fn, payload_size +
> -					sizeof(*rpc));
> -		if (!IS_ERR_OR_NULL(rpc)) {
> -			if (wait) {
> -				repv = rpc->data;
> -			} else {
> -				nvkm_gsp_rpc_done(gsp, rpc);
> -				repv = NULL;
> -			}
> -		} else {
> -			repv = wait ? rpc : NULL;
> -		}

This bit of code seems to have a slightly different flow from what
r535_gsp_rpc_handle_reply() does - it receives the response before
checking for the wait flag, while rpc_handle_reply() does things in the
opposite order. The new code also drops the call to nvkm_gsp_rpc_done().
Can you double-check and confirm this is ok?


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 1/2] drm/nouveau/nvkm: factor out current GSP RPC command policies
  2025-03-14  1:39   ` Alexandre Courbot
@ 2025-03-14  8:09     ` Zhi Wang
  0 siblings, 0 replies; 7+ messages in thread
From: Zhi Wang @ 2025-03-14  8:09 UTC (permalink / raw)
  To: Alexandre Courbot, nouveau@lists.freedesktop.org
  Cc: airlied@gmail.com, daniel@ffwll.ch, dakr@kernel.org,
	lyude@redhat.com, Ben Skeggs, Andy Currid, Neo Jia, Surath Mitra,
	Ankit Agrawal, Aniket Agashe, Kirti Wankhede,
	Tarun Gupta (SW-GPU), zhiwang@kernel.org, Nouveau

On 14/03/2025 3.39, Alexandre Courbot wrote:
> Hi Zhi,
> 
> Thanks, this patch has become super easy to review.
> 
> On Thu Feb 27, 2025 at 10:35 AM JST, Zhi Wang wrote:
>> There can be multiple cases of handling the GSP RPC messages, which are
>> the reply of GSP RPC commands according to the requirement of the
>> callers and the nature of the GSP RPC commands.
>>
>> The current supported reply policies are "callers don't care" and "receive
>> the entire message" according to the requirement of the callers. To
>> introduce a new policy, factor out the current RPC command reply polices.
>> Also, centralize the handling of the reply in a single function.
>>
>> Factor out NVKM_GSP_RPC_REPLY_NOWAIT as "callers don't care" and
>> NVKM_GSP_RPC_REPLY_RECV as "receive the entire message". Introduce a
>> kernel doc to document the policies. Factor out
>> r535_gsp_rpc_handle_reply().
>>
>> No functional change is intended for small GSP RPC commands. For large GSP
>> commands, the caller decides the policy of how to handle the returned GSP
>> RPC message.
>>
>> Cc: Ben Skeggs <bskeggs@nvidia.com>
>> Cc: Alexandre Courbot <acourbot@nvidia.com>
>> Signed-off-by: Zhi Wang <zhiw@nvidia.com>
>> ---
>>   Documentation/gpu/nouveau.rst                 |  3 +
>>   .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h | 34 +++++++--
>>   .../gpu/drm/nouveau/nvkm/subdev/bar/r535.c    |  2 +-
>>   .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c    | 75 ++++++++++---------
>>   .../drm/nouveau/nvkm/subdev/instmem/r535.c    |  2 +-
>>   5 files changed, 72 insertions(+), 44 deletions(-)
>>
>> diff --git a/Documentation/gpu/nouveau.rst b/Documentation/gpu/nouveau.rst
>> index 0f34131ccc27..b8c801e0068c 100644
>> --- a/Documentation/gpu/nouveau.rst
>> +++ b/Documentation/gpu/nouveau.rst
>> @@ -27,3 +27,6 @@ GSP Support
>>   
>>   .. kernel-doc:: drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
>>      :doc: GSP message queue element
>> +
>> +.. kernel-doc:: drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
>> +   :doc: GSP message handling policy
>> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
>> index 746e126c3ecf..e5fe44589bbd 100644
>> --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
>> +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
>> @@ -31,6 +31,25 @@ typedef int (*nvkm_gsp_msg_ntfy_func)(void *priv, u32 fn, void *repv, u32 repc);
>>   struct nvkm_gsp_event;
>>   typedef void (*nvkm_gsp_event_func)(struct nvkm_gsp_event *, void *repv, u32 repc);
>>   
>> +/**
>> + * DOC: GSP message handling policy
>> + *
>> + * When sending a GSP RPC command, there can be multiple cases of handling
>> + * the GSP RPC messages, which are the reply of GSP RPC commands, according
>> + * to the requirement of the callers and the nature of the GSP RPC commands.
>> + *
>> + * NVKM_GSP_RPC_REPLY_NOWAIT - If specified, immediately return to the
>> + * caller after the GSP RPC command is issued.
>> + *
>> + * NVKM_GSP_RPC_REPLY_RECV - If specified, wait and receive the entire GSP
>> + * RPC message after the GSP RPC command is issued.
>> + *
>> + */
>> +enum nvkm_gsp_rpc_reply_policy {
>> +	NVKM_GSP_RPC_REPLY_NOWAIT = 0,
>> +	NVKM_GSP_RPC_REPLY_RECV,
>> +};
>> +
>>   struct nvkm_gsp {
>>   	const struct nvkm_gsp_func *func;
>>   	struct nvkm_subdev subdev;
>> @@ -188,7 +207,8 @@ struct nvkm_gsp {
>>   
>>   	const struct nvkm_gsp_rm {
>>   		void *(*rpc_get)(struct nvkm_gsp *, u32 fn, u32 argc);
>> -		void *(*rpc_push)(struct nvkm_gsp *, void *argv, bool wait, u32 repc);
>> +		void *(*rpc_push)(struct nvkm_gsp *gsp, void *argv,
>> +				  enum nvkm_gsp_rpc_reply_policy policy, u32 repc);
>>   		void (*rpc_done)(struct nvkm_gsp *gsp, void *repv);
>>   
>>   		void *(*rm_ctrl_get)(struct nvkm_gsp_object *, u32 cmd, u32 argc);
>> @@ -255,9 +275,10 @@ nvkm_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 argc)
>>   }
>>   
>>   static inline void *
>> -nvkm_gsp_rpc_push(struct nvkm_gsp *gsp, void *argv, bool wait, u32 repc)
>> +nvkm_gsp_rpc_push(struct nvkm_gsp *gsp, void *argv,
>> +		  enum nvkm_gsp_rpc_reply_policy policy, u32 repc)
>>   {
>> -	return gsp->rm->rpc_push(gsp, argv, wait, repc);
>> +	return gsp->rm->rpc_push(gsp, argv, policy, repc);
>>   }
>>   
>>   static inline void *
>> @@ -268,13 +289,14 @@ nvkm_gsp_rpc_rd(struct nvkm_gsp *gsp, u32 fn, u32 argc)
>>   	if (IS_ERR_OR_NULL(argv))
>>   		return argv;
>>   
>> -	return nvkm_gsp_rpc_push(gsp, argv, true, argc);
>> +	return nvkm_gsp_rpc_push(gsp, argv, NVKM_GSP_RPC_REPLY_RECV, argc);
>>   }
>>   
>>   static inline int
>> -nvkm_gsp_rpc_wr(struct nvkm_gsp *gsp, void *argv, bool wait)
>> +nvkm_gsp_rpc_wr(struct nvkm_gsp *gsp, void *argv,
>> +		enum nvkm_gsp_rpc_reply_policy policy)
>>   {
>> -	void *repv = nvkm_gsp_rpc_push(gsp, argv, wait, 0);
>> +	void *repv = nvkm_gsp_rpc_push(gsp, argv, policy, 0);
>>   
>>   	if (IS_ERR(repv))
>>   		return PTR_ERR(repv);
>> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
>> index 3a30bea30e36..90186f98065c 100644
>> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
>> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/r535.c
>> @@ -56,7 +56,7 @@ r535_bar_bar2_update_pde(struct nvkm_gsp *gsp, u64 addr)
>>   	rpc->info.entryValue = addr ? ((addr >> 4) | 2) : 0; /* PD3 entry format! */
>>   	rpc->info.entryLevelShift = 47; //XXX: probably fetch this from mmu!
>>   
>> -	return nvkm_gsp_rpc_wr(gsp, rpc, true);
>> +	return nvkm_gsp_rpc_wr(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV);
>>   }
>>   
>>   static void
>> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
>> index db2602e88006..f73dcc3e1c0d 100644
>> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
>> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
>> @@ -585,13 +585,34 @@ r535_gsp_rpc_poll(struct nvkm_gsp *gsp, u32 fn)
>>   }
>>   
>>   static void *
>> -r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *payload, bool wait,
>> -		  u32 gsp_rpc_len)
>> +r535_gsp_rpc_handle_reply(struct nvkm_gsp *gsp, u32 fn,
>> +			  enum nvkm_gsp_rpc_reply_policy policy,
>> +			  u32 gsp_rpc_len)
>> +{
>> +	struct nvfw_gsp_rpc *reply;
>> +	void *repv = NULL;
>> +
>> +	switch (policy) {
>> +	case NVKM_GSP_RPC_REPLY_NOWAIT:
>> +		break;
>> +	case NVKM_GSP_RPC_REPLY_RECV:
>> +		reply = r535_gsp_msg_recv(gsp, fn, gsp_rpc_len);
>> +		if (!IS_ERR_OR_NULL(reply))
>> +			repv = reply->data;
>> +		else
>> +			repv = reply;
>> +		break;
>> +	}
>> +
>> +	return repv;
>> +}
>> +
>> +static void *
>> +r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *payload,
>> +		  enum nvkm_gsp_rpc_reply_policy policy, u32 gsp_rpc_len)
>>   {
>>   	struct nvfw_gsp_rpc *rpc = to_gsp_hdr(payload, rpc);
>> -	struct nvfw_gsp_rpc *msg;
>>   	u32 fn = rpc->function;
>> -	void *repv = NULL;
>>   	int ret;
>>   
>>   	if (gsp->subdev.debug >= NV_DBG_TRACE) {
>> @@ -605,15 +626,7 @@ r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *payload, bool wait,
>>   	if (ret)
>>   		return ERR_PTR(ret);
>>   
>> -	if (wait) {
>> -		msg = r535_gsp_msg_recv(gsp, fn, gsp_rpc_len);
>> -		if (!IS_ERR_OR_NULL(msg))
>> -			repv = msg->data;
>> -		else
>> -			repv = msg;
>> -	}
>> -
>> -	return repv;
>> +	return r535_gsp_rpc_handle_reply(gsp, fn, policy, gsp_rpc_len);
>>   }
>>   
>>   static void
>> @@ -797,7 +810,7 @@ r535_gsp_rpc_rm_free(struct nvkm_gsp_object *object)
>>   	rpc->params.hRoot = client->object.handle;
>>   	rpc->params.hObjectParent = 0;
>>   	rpc->params.hObjectOld = object->handle;
>> -	return nvkm_gsp_rpc_wr(gsp, rpc, true);
>> +	return nvkm_gsp_rpc_wr(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV);
>>   }
>>   
>>   static void
>> @@ -815,7 +828,7 @@ r535_gsp_rpc_rm_alloc_push(struct nvkm_gsp_object *object, void *params)
>>   	struct nvkm_gsp *gsp = object->client->gsp;
>>   	void *ret = NULL;
>>   
>> -	rpc = nvkm_gsp_rpc_push(gsp, rpc, true, sizeof(*rpc));
>> +	rpc = nvkm_gsp_rpc_push(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV, sizeof(*rpc));
>>   	if (IS_ERR_OR_NULL(rpc))
>>   		return rpc;
>>   
>> @@ -876,7 +889,7 @@ r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, void **params, u32 rep
>>   	struct nvkm_gsp *gsp = object->client->gsp;
>>   	int ret = 0;
>>   
>> -	rpc = nvkm_gsp_rpc_push(gsp, rpc, true, repc);
>> +	rpc = nvkm_gsp_rpc_push(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV, repc);
>>   	if (IS_ERR_OR_NULL(rpc)) {
>>   		*params = NULL;
>>   		return PTR_ERR(rpc);
>> @@ -948,8 +961,8 @@ r535_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 payload_size)
>>   }
>>   
>>   static void *
>> -r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, bool wait,
>> -		  u32 gsp_rpc_len)
>> +r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload,
>> +		  enum nvkm_gsp_rpc_reply_policy policy, u32 gsp_rpc_len)
>>   {
>>   	struct nvfw_gsp_rpc *rpc = to_gsp_hdr(payload, rpc);
>>   	struct r535_gsp_msg *msg = to_gsp_hdr(rpc, msg);
>> @@ -967,7 +980,7 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, bool wait,
>>   		rpc->length = sizeof(*rpc) + max_payload_size;
>>   		msg->checksum = rpc->length;
>>   
>> -		repv = r535_gsp_rpc_send(gsp, payload, false, 0);
>> +		repv = r535_gsp_rpc_send(gsp, payload, NVKM_GSP_RPC_REPLY_NOWAIT, 0);
>>   		if (IS_ERR(repv))
>>   			goto done;
>>   
>> @@ -988,7 +1001,7 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, bool wait,
>>   
>>   			memcpy(next, payload, size);
>>   
>> -			repv = r535_gsp_rpc_send(gsp, next, false, 0);
>> +			repv = r535_gsp_rpc_send(gsp, next, NVKM_GSP_RPC_REPLY_NOWAIT, 0);
>>   			if (IS_ERR(repv))
>>   				goto done;
>>   
>> @@ -997,20 +1010,10 @@ r535_gsp_rpc_push(struct nvkm_gsp *gsp, void *payload, bool wait,
>>   		}
>>   
>>   		/* Wait for reply. */
>> -		rpc = r535_gsp_msg_recv(gsp, fn, payload_size +
>> -					sizeof(*rpc));
>> -		if (!IS_ERR_OR_NULL(rpc)) {
>> -			if (wait) {
>> -				repv = rpc->data;
>> -			} else {
>> -				nvkm_gsp_rpc_done(gsp, rpc);
>> -				repv = NULL;
>> -			}
>> -		} else {
>> -			repv = wait ? rpc : NULL;
>> -		}
> 
> This bit of code seems to have a slightly different flow from what
> r535_gsp_rpc_handle_reply() does - it receives the response before
> checking for the wait flag, while rpc_handle_reply() does things in the
> opposite order. The new code also drops the call to nvkm_gsp_rpc_done().
> Can you double-check and confirm this is ok?
> 
Hi Alex:

Thanks for the comment and review.

Yes. The original code path will poll the message unconditionally when 
handling big GSP commands, which is to align with currently what OpenRM 
is doing.

As what mentioned in the comment, we unify the behavior between the 
small and big GSP message handling and make policies explicit and clear, 
instead of letting them be scattered in the different callers and 
sending/receiving functions, or behaves even differently in the same 
sending function. Because it is pretty much a strategy thing. As a 
sending function, I would like those strategies staying in the caller as 
much as it can be, and let the sending function just operate stuff as 
instructed.

So the caller decides how to handle the GSP message after sending the 
big GSP command: if the caller specifies a RECV policy, it needs to 
handle the GSP message vehicle lifecycle, 
alloc/free(nvkm_gsp_rpc_done()). If the caller specifies a POLL policy, 
it doesn't need to.

Back to the code, the only in-tree user of the big GSP command is the
NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY and its policy is changed to POLL in 
the PATCH2. So there is no GSP message alloc/free involved in that path 
then.

Hope that make things clear.

Z.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v3 2/2] drm/nouveau/nvkm: introduce new GSP reply policy NVKM_GSP_RPC_REPLY_POLL
  2025-02-27  1:35 [PATCH v3 0/2] NVKM GSP RPC message handling policy Zhi Wang
  2025-02-27  1:35 ` [PATCH v3 1/2] drm/nouveau/nvkm: factor out current GSP RPC command policies Zhi Wang
@ 2025-02-27  1:35 ` Zhi Wang
  2025-03-09 13:00 ` [PATCH v3 0/2] NVKM GSP RPC message handling policy Danilo Krummrich
  2 siblings, 0 replies; 7+ messages in thread
From: Zhi Wang @ 2025-02-27  1:35 UTC (permalink / raw)
  To: nouveau
  Cc: airlied, daniel, dakr, lyude, bskeggs, acurrid, cjia, smitra,
	ankita, aniketa, kwankhede, targupta, zhiw, zhiwang,
	Alexandre Courbot

Some GSP RPC commands need a new reply policy: "caller don't care about
the message content but want to make sure a reply is received". To
support this case, a new reply policy is introduced.

NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY is a large GSP RPC command. The actual
required policy is NVKM_GSP_RPC_REPLY_POLL. This can be observed from the
dump of the GSP message queue. After the large GSP RPC command is issued,
GSP will write only an empty RPC header in the queue as the reply.

Without this change, the policy "receiving the entire message" is used
for NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY. This causes the timeout of receiving
the returned GSP message in the suspend/resume path.

Introduce the new reply policy NVKM_GSP_RPC_REPLY_POLL, which waits for
the returned GSP message but discards it for the caller. Use the new policy
NVKM_GSP_RPC_REPLY_POLL on the GSP RPC command
NV_VGPU_MSG_FUNCTION_ALLOC_MEMORY.

Fixes: 50f290053d79 ("drm/nouveau: support handling the return of large GSP message")
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: Alexandre Courbot <acourbot@nvidia.com>
Tested-by: Ben Skeggs <bskeggs@nvidia.com>
Signed-off-by: Zhi Wang <zhiw@nvidia.com>
---
 drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h  | 4 ++++
 drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c     | 3 +++
 drivers/gpu/drm/nouveau/nvkm/subdev/instmem/r535.c | 2 +-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
index e5fe44589bbd..1c12854a8550 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -44,10 +44,14 @@ typedef void (*nvkm_gsp_event_func)(struct nvkm_gsp_event *, void *repv, u32 rep
  * NVKM_GSP_RPC_REPLY_RECV - If specified, wait and receive the entire GSP
  * RPC message after the GSP RPC command is issued.
  *
+ * NVKM_GSP_RPC_REPLY_POLL - If specified, wait for the specific reply and
+ * discard the reply before returning to the caller.
+ *
  */
 enum nvkm_gsp_rpc_reply_policy {
 	NVKM_GSP_RPC_REPLY_NOWAIT = 0,
 	NVKM_GSP_RPC_REPLY_RECV,
+	NVKM_GSP_RPC_REPLY_POLL,
 };
 
 struct nvkm_gsp {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index f73dcc3e1c0d..969f6b921fdb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -602,6 +602,9 @@ r535_gsp_rpc_handle_reply(struct nvkm_gsp *gsp, u32 fn,
 		else
 			repv = reply;
 		break;
+	case NVKM_GSP_RPC_REPLY_POLL:
+		repv = r535_gsp_msg_recv(gsp, fn, 0);
+		break;
 	}
 
 	return repv;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/r535.c
index 2789efe9c100..35ba1798ee6e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/r535.c
@@ -105,7 +105,7 @@ fbsr_memlist(struct nvkm_gsp_device *device, u32 handle, enum nvkm_memory_target
 			rpc->pteDesc.pte_pde[i].pte = (phys >> 12) + i;
 	}
 
-	ret = nvkm_gsp_rpc_wr(gsp, rpc, NVKM_GSP_RPC_REPLY_RECV);
+	ret = nvkm_gsp_rpc_wr(gsp, rpc, NVKM_GSP_RPC_REPLY_POLL);
 	if (ret)
 		return ret;
 
-- 
2.43.5


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 0/2] NVKM GSP RPC message handling policy
  2025-02-27  1:35 [PATCH v3 0/2] NVKM GSP RPC message handling policy Zhi Wang
  2025-02-27  1:35 ` [PATCH v3 1/2] drm/nouveau/nvkm: factor out current GSP RPC command policies Zhi Wang
  2025-02-27  1:35 ` [PATCH v3 2/2] drm/nouveau/nvkm: introduce new GSP reply policy NVKM_GSP_RPC_REPLY_POLL Zhi Wang
@ 2025-03-09 13:00 ` Danilo Krummrich
  2 siblings, 0 replies; 7+ messages in thread
From: Danilo Krummrich @ 2025-03-09 13:00 UTC (permalink / raw)
  To: Zhi Wang
  Cc: nouveau, airlied, daniel, lyude, bskeggs, acurrid, cjia, smitra,
	ankita, aniketa, kwankhede, targupta, zhiwang

On Thu, Feb 27, 2025 at 01:35:52AM +0000, Zhi Wang wrote:
> Hi folks:
> 
> Here are the v3 version of NVKM GSP RPC message handling policy which
> solves the issue reported by Ben. More details can be found in v1 [1].
> 
> v3:
> 
> - Fix a hidden use-after-free caught by KFENCE in PATCH 1,
>   r535_gsp_rpc_send(). (Ben)
> 
> Zhi Wang (2):
>   drm/nouveau/nvkm: factor out current GSP RPC command policies
>   drm/nouveau/nvkm: introduce new GSP reply policy
>     NVKM_GSP_RPC_REPLY_POLL
> 
>  Documentation/gpu/nouveau.rst                 |  3 +
>  .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h | 38 +++++++--
>  .../gpu/drm/nouveau/nvkm/subdev/bar/r535.c    |  2 +-
>  .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c    | 78 ++++++++++---------
>  .../drm/nouveau/nvkm/subdev/instmem/r535.c    |  2 +-
>  5 files changed, 79 insertions(+), 44 deletions(-)

Applied to drm-misc-next, thanks!

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-03-14  8:09 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-02-27  1:35 [PATCH v3 0/2] NVKM GSP RPC message handling policy Zhi Wang
2025-02-27  1:35 ` [PATCH v3 1/2] drm/nouveau/nvkm: factor out current GSP RPC command policies Zhi Wang
2025-02-27  1:46   ` Timur Tabi
2025-03-14  1:39   ` Alexandre Courbot
2025-03-14  8:09     ` Zhi Wang
2025-02-27  1:35 ` [PATCH v3 2/2] drm/nouveau/nvkm: introduce new GSP reply policy NVKM_GSP_RPC_REPLY_POLL Zhi Wang
2025-03-09 13:00 ` [PATCH v3 0/2] NVKM GSP RPC message handling policy Danilo Krummrich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.