All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] [v2] drm/nouveau: retain device pointer in nvkm_gsp_mem object
@ 2024-09-10 21:56 Timur Tabi
  2024-09-10 21:56 ` [PATCH 2/2] [v8] drm/nouveau: expose GSP-RM logging buffers via debugfs Timur Tabi
  0 siblings, 1 reply; 5+ messages in thread
From: Timur Tabi @ 2024-09-10 21:56 UTC (permalink / raw)
  To: Danilo Krummrich, Dave Airlie, bskeggs, nouveau

Store the struct device pointer used to allocate the DMA buffer in
the nvkm_gsp_mem object.  This allows nvkm_gsp_mem_dtor() to release
the buffer without needing the nvkm_gsp.  This is needed so that
we can retain DMA buffers even after the nvkm_gsp object is deleted.

Signed-off-by: Timur Tabi <ttabi@nvidia.com>
---
 .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h |  1 +
 .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c    | 50 ++++++++++++-------
 2 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
index 9e6f39912368..a45a4ad843b9 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -9,6 +9,7 @@
 #define GSP_PAGE_SIZE  BIT(GSP_PAGE_SHIFT)
 
 struct nvkm_gsp_mem {
+	struct device *dev;
 	size_t size;
 	void *data;
 	dma_addr_t addr;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index cf58f9da9139..86450b0cd605 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -1000,7 +1000,7 @@ r535_gsp_rpc_get_gsp_static_info(struct nvkm_gsp *gsp)
 }
 
 static void
-nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem)
+nvkm_gsp_mem_dtor(struct nvkm_gsp_mem *mem)
 {
 	if (mem->data) {
 		/*
@@ -1009,19 +1009,35 @@ nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem)
 		 */
 		memset(mem->data, 0xFF, mem->size);
 
-		dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr);
+		dma_free_coherent(mem->dev, mem->size, mem->data, mem->addr);
+		put_device(mem->dev);
+
 		memset(mem, 0, sizeof(*mem));
 	}
 }
 
+/**
+ * nvkm_gsp_mem_ctor - constructor for nvkm_gsp_mem objects
+ * @gsp: gsp pointer
+ * @size: number of bytes to allocate
+ * @mem: nvkm_gsp_mem object to initialize
+ *
+ * Allocates a block of memory for use with GSP.
+ *
+ * This memory block can potentially out-live the driver's remove() callback,
+ * so we take a device reference to ensure its lifetime. The reference is
+ * dropped in the destructor.
+ */
 static int
 nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, size_t size, struct nvkm_gsp_mem *mem)
 {
-	mem->size = size;
 	mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL);
 	if (WARN_ON(!mem->data))
 		return -ENOMEM;
 
+	mem->size = size;
+	mem->dev = get_device(gsp->subdev.device->dev);
+
 	return 0;
 }
 
@@ -1054,8 +1070,8 @@ r535_gsp_postinit(struct nvkm_gsp *gsp)
 	nvkm_wr32(device, 0x110004, 0x00000040);
 
 	/* Release the DMA buffers that were needed only for boot and init */
-	nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw);
-	nvkm_gsp_mem_dtor(gsp, &gsp->libos);
+	nvkm_gsp_mem_dtor(&gsp->boot.fw);
+	nvkm_gsp_mem_dtor(&gsp->libos);
 
 	return ret;
 }
@@ -2234,8 +2250,8 @@ static void
 nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3)
 {
 	nvkm_gsp_sg_free(gsp->subdev.device, &rx3->lvl2);
-	nvkm_gsp_mem_dtor(gsp, &rx3->lvl1);
-	nvkm_gsp_mem_dtor(gsp, &rx3->lvl0);
+	nvkm_gsp_mem_dtor(&rx3->lvl1);
+	nvkm_gsp_mem_dtor(&rx3->lvl0);
 }
 
 /**
@@ -2323,9 +2339,9 @@ nvkm_gsp_radix3_sg(struct nvkm_gsp *gsp, struct sg_table *sgt, u64 size,
 
 	if (ret) {
 lvl2_fail:
-		nvkm_gsp_mem_dtor(gsp, &rx3->lvl1);
+		nvkm_gsp_mem_dtor(&rx3->lvl1);
 lvl1_fail:
-		nvkm_gsp_mem_dtor(gsp, &rx3->lvl0);
+		nvkm_gsp_mem_dtor(&rx3->lvl0);
 	}
 
 	return ret;
@@ -2417,7 +2433,7 @@ r535_gsp_init(struct nvkm_gsp *gsp)
 
 done:
 	if (gsp->sr.meta.data) {
-		nvkm_gsp_mem_dtor(gsp, &gsp->sr.meta);
+		nvkm_gsp_mem_dtor(&gsp->sr.meta);
 		nvkm_gsp_radix3_dtor(gsp, &gsp->sr.radix3);
 		nvkm_gsp_sg_free(gsp->subdev.device, &gsp->sr.sgt);
 		return ret;
@@ -2498,7 +2514,7 @@ r535_gsp_dtor(struct nvkm_gsp *gsp)
 	mutex_destroy(&gsp->client_id.mutex);
 
 	nvkm_gsp_radix3_dtor(gsp, &gsp->radix3);
-	nvkm_gsp_mem_dtor(gsp, &gsp->sig);
+	nvkm_gsp_mem_dtor(&gsp->sig);
 	nvkm_firmware_dtor(&gsp->fw);
 
 	nvkm_falcon_fw_dtor(&gsp->booter.unload);
@@ -2509,12 +2525,12 @@ r535_gsp_dtor(struct nvkm_gsp *gsp)
 
 	r535_gsp_dtor_fws(gsp);
 
-	nvkm_gsp_mem_dtor(gsp, &gsp->rmargs);
-	nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta);
-	nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem);
-	nvkm_gsp_mem_dtor(gsp, &gsp->loginit);
-	nvkm_gsp_mem_dtor(gsp, &gsp->logintr);
-	nvkm_gsp_mem_dtor(gsp, &gsp->logrm);
+	nvkm_gsp_mem_dtor(&gsp->rmargs);
+	nvkm_gsp_mem_dtor(&gsp->wpr_meta);
+	nvkm_gsp_mem_dtor(&gsp->shm.mem);
+	nvkm_gsp_mem_dtor(&gsp->loginit);
+	nvkm_gsp_mem_dtor(&gsp->logintr);
+	nvkm_gsp_mem_dtor(&gsp->logrm);
 }
 
 int

base-commit: f327bfdbf6c6d7d8e5402795c7c97fb97c2dcf79
prerequisite-patch-id: 6f7d619f67878dea2a3378e76e28d3c666921fbd
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] [v8] drm/nouveau: expose GSP-RM logging buffers via debugfs
  2024-09-10 21:56 [PATCH 1/2] [v2] drm/nouveau: retain device pointer in nvkm_gsp_mem object Timur Tabi
@ 2024-09-10 21:56 ` Timur Tabi
  2024-10-03 21:48   ` Danilo Krummrich
  0 siblings, 1 reply; 5+ messages in thread
From: Timur Tabi @ 2024-09-10 21:56 UTC (permalink / raw)
  To: Danilo Krummrich, Dave Airlie, bskeggs, nouveau

The LOGINIT, LOGINTR, LOGRM, and LOGPMU buffers are circular buffers
that have printf-like logs from GSP-RM and PMU encoded in them.

LOGINIT, LOGINTR, and LOGRM are allocated by Nouveau and their DMA
addresses are passed to GSP-RM during initialization. The buffers are
required for GSP-RM to initialize properly.

LOGPMU is also allocated by Nouveau, but its contents are updated
when Nouveau receives an NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT RPC from
GSP-RM. Nouveau then copies the RPC to the buffer.

The messages are encoded as an array of variable-length structures that
contain the parameters to an NV_PRINTF call. The format string and
parameter count are stored in a special ELF image that contains only
logging strings. This image is not currently shipped with the Nvidia
driver.

There are two methods to extract the logs.

OpenRM tries to load the logging ELF, and if present, parses the log
buffers in real time and outputs the strings to the kernel console.

Alternatively, and this is the method used by this patch, the buffers
can be exposed to user space, and a user-space tool (along with the
logging ELF image) can parse the buffer and dump the logs.

This method has the advantage that it allows the buffers to be parsed
even when the logging ELF file is not available to the user. However,
it has the disadvantage the debugfs entries need to remain until the
driver is unloaded.

The buffers are exposed via debugfs. If GSP-RM fails to initialize, then
Nouveau immediately shuts down the GSP interface. This would normally
also deallocate the logging buffers, thereby preventing the user from
capturing the debug logs.

To avoid this, introduce the keep-gsp-logging command line parameter. If
specified, and if at least one logging buffer has content, then Nouveau
will migrate these buffers into new debugfs entries that are retained
until the driver unloads.

An end-user can capture the logs using the following commands:

    cp /sys/kernel/debug/nouveau/<path>/loginit loginit
    cp /sys/kernel/debug/nouveau/<path>/logrm logrm
    cp /sys/kernel/debug/nouveau/<path>/logintr logintr
    cp /sys/kernel/debug/nouveau/<path>/logpmu logpmu

where (for a PCI device) <path> is the PCI ID of the GPU (e.g.
0000:65:00.0).

Since LOGPMU is not needed for normal GSP-RM operation, it is only
created if debugfs is available. Otherwise, the
NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT RPCs are ignored.

A simple way to test the buffer migration feature is to have
nvkm_gsp_init() return an error code.

Signed-off-by: Timur Tabi <ttabi@nvidia.com>
---
v8:
 Rebased onto drm-next 6.11.0-rc5
 Using /sys/kernel/debug/nouveau instead of debug/dri
 Note: this is an extensive change from v7, so please review thoroughly.

 drivers/gpu/drm/nouveau/include/nvif/log.h    |  57 ++
 .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h |  18 +
 drivers/gpu/drm/nouveau/nouveau_drm.c         |  12 +
 .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c    | 496 +++++++++++++++++-
 4 files changed, 582 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/nouveau/include/nvif/log.h

diff --git a/drivers/gpu/drm/nouveau/include/nvif/log.h b/drivers/gpu/drm/nouveau/include/nvif/log.h
new file mode 100644
index 000000000000..a2bf3be08113
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/log.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: MIT */
+/* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __NVIF_LOG_H__
+#define __NVIF_LOG_H__
+
+/**
+ * nvif_log - structure for tracking logging buffers
+ * @entry: an entry in a list of struct nvif_logs
+ * @shutdown: pointer to function to call to clean up
+ *
+ * Structure used to track logging buffers so that they can be cleaned up
+ * when the module exits.
+ *
+ * The @shutdown function is called when the module exits. It should free all
+ * backing resources, such as logging buffers.
+ */
+struct nvif_log {
+	struct list_head entry;
+	void (*shutdown)(struct nvif_log *log);
+};
+
+/**
+ * nvif_logs - linked list of nvif_log objects
+ */
+struct nvif_logs {
+	struct list_head head;
+};
+
+#define NVIF_LOGS_DECLARE(logs) \
+	struct nvif_logs logs = { LIST_HEAD_INIT(logs.head) }
+
+static inline void nvif_log_shutdown(struct nvif_logs *logs)
+{
+	if (!list_empty(&logs->head)) {
+		struct nvif_log *log, *n;
+
+		list_for_each_entry_safe(log, n, &logs->head, entry) {
+			/* shutdown() should also delete the log entry */
+			log->shutdown(log);
+		}
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+/**
+ * gsp_logs - list of nvif_log GSP-RM logging buffers
+ *
+ * Head pointer to a a list of nvif_log buffers that is created for each GPU
+ * upon GSP shutdown if the "keep_gsp_logging" command-line parameter is
+ * specified. This is used to track the alternative debugfs entries for the
+ * GSP-RM logs.
+ */
+extern struct nvif_logs gsp_logs;
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
index a45a4ad843b9..474e8fec69ef 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -5,6 +5,8 @@
 #include <core/falcon.h>
 #include <core/firmware.h>
 
+#include <linux/debugfs.h>
+
 #define GSP_PAGE_SHIFT 12
 #define GSP_PAGE_SIZE  BIT(GSP_PAGE_SHIFT)
 
@@ -220,6 +222,22 @@ struct nvkm_gsp {
 
 	/* The size of the registry RPC */
 	size_t registry_rpc_size;
+
+#ifdef CONFIG_DEBUG_FS
+	/*
+	 * Logging buffers in debugfs. The wrapper objects need to remain
+	 * in memory until the dentry is deleted.
+	 */
+	struct dentry *dir;		/* Parent dentry */
+	struct dentry *dir_init;
+	struct dentry *dir_rm;
+	struct dentry *dir_intr;
+	struct dentry *dir_pmu;
+	struct debugfs_blob_wrapper blob_init;
+	struct debugfs_blob_wrapper blob_intr;
+	struct debugfs_blob_wrapper blob_rm;
+	struct debugfs_blob_wrapper blob_pmu;
+#endif
 };
 
 static inline bool
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 4a9a9b9c3935..be6ee71a7ae7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -46,6 +46,7 @@
 #include <nvif/fifo.h>
 #include <nvif/push006c.h>
 #include <nvif/user.h>
+#include <nvif/log.h>
 
 #include <nvif/class.h>
 #include <nvif/cl0002.h>
@@ -112,6 +113,13 @@ static struct drm_driver driver_stub;
 static struct drm_driver driver_pci;
 static struct drm_driver driver_platform;
 
+#ifdef CONFIG_DEBUG_FS
+/**
+ * gsp_logs - list of GSP debugfs logging buffers
+ */
+NVIF_LOGS_DECLARE(gsp_logs);
+#endif
+
 static u64
 nouveau_pci_name(struct pci_dev *pdev)
 {
@@ -1460,6 +1468,10 @@ nouveau_drm_exit(void)
 #endif
 	if (IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM))
 		mmu_notifier_synchronize();
+
+#ifdef CONFIG_DEBUG_FS
+	nvif_log_shutdown(&gsp_logs);
+#endif
 }
 
 module_init(nouveau_drm_init);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index 86450b0cd605..593279030eb6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -26,6 +26,8 @@
 #include <subdev/vfn.h>
 #include <engine/fifo/chan.h>
 #include <engine/sec2.h>
+#include <drm/drm_device.h>
+#include <nvif/log.h>
 
 #include <nvfw/fw.h>
 
@@ -2076,6 +2078,265 @@ r535_gsp_rmargs_init(struct nvkm_gsp *gsp, bool resume)
 	return 0;
 }
 
+#ifdef CONFIG_DEBUG_FS
+
+/*
+ * If GSP-RM load fails, then the GSP nvkm object will be deleted, the logging
+ * debugfs entries will be deleted, and it will not be possible to debug the
+ * load failure. The keep_gsp_logging parameter tells Nouveau to copy the
+ * logging buffers to new debugfs entries, and these entries are retained
+ * until the driver unloads.
+ */
+static bool keep_gsp_logging;
+module_param(keep_gsp_logging, bool, 0444);
+MODULE_PARM_DESC(keep_gsp_logging,
+		 "Migrate the GSP-RM logging debugfs entries upon exit");
+
+/*
+ * The debugfs root for all devices.  Normally we'd use /sys/kernel/debug/dri.,
+ * but that path may not exist when we need it.  So create a new root
+ * /sys/kernel/debug/nouveau/.
+ *
+ * We take a reference every time a dentry under the root is created.  When
+ * the last reference is released, the root is deleted.
+ */
+static struct {
+	struct mutex mutex; /* Protects dentry */
+	struct dentry *dentry;
+	struct kref kref;
+} root = {
+	.mutex = __MUTEX_INITIALIZER(root.mutex),
+	.kref = KREF_INIT(0),
+	.dentry = NULL,
+};
+
+static void release_root_dentry(struct kref *ref)
+{
+	mutex_lock(&root.mutex);
+	debugfs_remove(root.dentry);
+	root.dentry = NULL;
+	mutex_unlock(&root.mutex);
+}
+
+/*
+ * GSP-RM uses a pseudo-class mechanism to define of a variety of per-"engine"
+ * data structures, and each engine has a "class ID" genererated by a
+ * pre-processor. This is the class ID for the PMU.
+ */
+#define NV_GSP_MSG_EVENT_UCODE_LIBOS_CLASS_PMU		0xf3d722
+
+/**
+ * rpc_ucode_libos_print_v1E_08 - RPC payload for libos print buffers
+ * @ucode_eng_desc: the engine descriptor
+ * @libos_print_buf_size: the size of the libos_print_buf[]
+ * @libos_print_buf: the actual buffer
+ *
+ * The engine descriptor is divided into 31:8 "class ID" and 7:0 "instance
+ * ID". We only care about messages from PMU.
+ */
+struct rpc_ucode_libos_print_v1e_08 {
+	u32 ucode_eng_desc;
+	u32 libos_print_buf_size;
+	u8 libos_print_buf[];
+};
+
+/**
+ * r535_gsp_msg_libos_print - capture log message from the PMU
+ * @priv: gsp pointer
+ * @fn: function number (ignored)
+ * @repv: pointer to libos print RPC
+ * @repc: message size
+ *
+ * Called when we receive a UCODE_LIBOS_PRINT event RPC from GSP-RM. This RPC
+ * contains the contents of the libos print buffer from PMU. It is typically
+ * only written to when PMU encounters an error.
+ *
+ * Technically this RPC can be used to pass print buffers from any number of
+ * GSP-RM engines, but we only expect to receive them for the PMU.
+ *
+ * For the PMU, the buffer is 4K in size and the RPC always contains the full
+ * contents.
+ */
+static int
+r535_gsp_msg_libos_print(void *priv, u32 fn, void *repv, u32 repc)
+{
+	struct nvkm_gsp *gsp = priv;
+	struct nvkm_subdev *subdev = &gsp->subdev;
+	struct rpc_ucode_libos_print_v1e_08 *rpc = repv;
+	unsigned int class = rpc->ucode_eng_desc >> 8;
+
+	nvkm_debug(subdev, "received libos print from class 0x%x for %u bytes\n",
+		   class, rpc->libos_print_buf_size);
+
+	if (class != NV_GSP_MSG_EVENT_UCODE_LIBOS_CLASS_PMU) {
+		nvkm_warn(subdev,
+			  "received libos print from unknown class 0x%x\n",
+			  class);
+		return -ENOMSG;
+	}
+
+	if (rpc->libos_print_buf_size > GSP_PAGE_SIZE) {
+		nvkm_error(subdev, "libos print is too large (%u bytes)\n",
+			   rpc->libos_print_buf_size);
+		return -E2BIG;
+	}
+
+	memcpy(gsp->blob_pmu.data, rpc->libos_print_buf, rpc->libos_print_buf_size);
+
+	return 0;
+}
+
+/**
+ * create_debufgs - create a blob debugfs entry
+ * @name: filename
+ * @parent: parent
+ * @blob: blob
+ *
+ * Creates a debugfs entry for a logging buffer with the name 'name'.
+ */
+static struct dentry *create_debugfs(struct nvkm_gsp *gsp, const char *name,
+				     struct debugfs_blob_wrapper *blob)
+{
+	struct dentry *dir;
+
+	dir = debugfs_create_blob(name, 0444, gsp->dir, blob);
+	if (IS_ERR(dir)) {
+		nvkm_error(&gsp->subdev,
+			   "failed to create %s debugfs entry\n", name);
+		return NULL;
+	}
+
+	/*
+	 * For some reason, debugfs_create_blob doesn't set the size of the
+	 * dentry, so do that here.
+	 */
+	i_size_write(d_inode(dir), blob->size);
+
+	return dir;
+}
+
+/**
+ * r535_gsp_libos_debugfs_init - create logging debugfs entries
+ * @gsp: gsp pointer
+ *
+ * Create the debugfs entries. This exposes the log buffers to userspace so
+ * that an external tool can parse it.
+ *
+ * The 'logpmu' contains exception dumps from the PMU. It is written via an
+ * RPC sent from GSP-RM and must be only 4KB. We create it here because it's
+ * only useful if there is a debugfs entry to expose it. If we get the PMU
+ * logging RPC and there is no debugfs entry, the RPC is just ignored.
+ *
+ * The blob_init, blob_rm, and blob_pmu objects can't be transient
+ * because debugfs_create_blob doesn't copy them.
+ *
+ * NOTE: OpenRM loads the logging elf image and prints the log messages
+ * in real-time. We may add that capability in the future, but that
+ * requires loading ELF images that are not distributed with the driver and
+ * adding the parsing code to Nouveau.
+ *
+ * Ideally, this should be part of nouveau_debugfs_init(), but that function
+ * is called too late. We really want to create these debugfs entries before
+ * r535_gsp_booter_load() is called, so that if GSP-RM fails to initialize,
+ * there could still be a log to capture.
+ */
+static void
+r535_gsp_libos_debugfs_init(struct nvkm_gsp *gsp)
+{
+	struct device *dev = gsp->subdev.device->dev;
+
+	mutex_lock(&root.mutex);
+
+	/* Create /sys/kerne/debug/nouveau if it doesn't already exist */
+	if (!root.dentry) {
+		root.dentry = debugfs_create_dir("nouveau", NULL);
+		if (PTR_ERR_OR_ZERO(root.dentry)) {
+			nvkm_error(&gsp->subdev, "could not create debugfs root\n");
+			mutex_unlock(&root.mutex);
+			return;
+		}
+		kref_init(&root.kref);
+	} else {
+		kref_get(&root.kref);
+	}
+
+	/* Create a new debugfs directory with a name unique to this GPU. */
+	gsp->dir = debugfs_create_dir(dev_name(dev), root.dentry);
+	if (IS_ERR(gsp->dir)) {
+		nvkm_error(&gsp->subdev,
+			   "failed to create %s debugfs root\n", dev_name(dev));
+		return;
+	}
+
+	/* Take a reference every time we create a new per-GPU dentry */
+	mutex_unlock(&root.mutex);
+
+	gsp->blob_init.data = gsp->loginit.data;
+	gsp->blob_init.size = gsp->loginit.size;
+	gsp->blob_intr.data = gsp->logintr.data;
+	gsp->blob_intr.size = gsp->logintr.size;
+	gsp->blob_rm.data = gsp->logrm.data;
+	gsp->blob_rm.size = gsp->logrm.size;
+
+	gsp->dir_init = create_debugfs(gsp, "loginit", &gsp->blob_init);
+	if (!gsp->dir_init)
+		goto error;
+
+	gsp->dir_intr = debugfs_create_blob("logintr", 0444, gsp->dir, &gsp->blob_intr);
+	if (IS_ERR(gsp->dir_intr)) {
+		nvkm_error(&gsp->subdev, "failed to create logintr debugfs entry\n");
+		goto error;
+	}
+
+	gsp->dir_rm = debugfs_create_blob("logrm", 0444, gsp->dir, &gsp->blob_rm);
+	if (IS_ERR(gsp->dir_rm)) {
+		nvkm_error(&gsp->subdev, "failed to create logrm debugfs entry\n");
+		goto error;
+	}
+
+	/*
+	 * Since the PMU buffer is copied from an RPC, it doesn't need to be
+	 * a DMA buffer.
+	 */
+	gsp->blob_pmu.size = GSP_PAGE_SIZE;
+	gsp->blob_pmu.data = kzalloc(gsp->blob_pmu.size, GFP_KERNEL);
+	if (!gsp->blob_pmu.data)
+		goto error;
+
+	gsp->dir_pmu = debugfs_create_blob("logpmu", 0444, gsp->dir, &gsp->blob_pmu);
+	if (IS_ERR(gsp->dir_pmu)) {
+		nvkm_error(&gsp->subdev, "failed to create logpmu debugfs entry\n");
+		kfree(gsp->blob_pmu.data);
+		goto error;
+	}
+
+	i_size_write(d_inode(gsp->dir_init), gsp->blob_init.size);
+	i_size_write(d_inode(gsp->dir_intr), gsp->blob_intr.size);
+	i_size_write(d_inode(gsp->dir_rm), gsp->blob_rm.size);
+	i_size_write(d_inode(gsp->dir_pmu), gsp->blob_pmu.size);
+
+	r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT,
+			      r535_gsp_msg_libos_print, gsp);
+
+	nvkm_debug(&gsp->subdev, "created debugfs GSP-RM logging entries\n");
+
+	if (keep_gsp_logging) {
+		nvkm_info(&gsp->subdev,
+			  "logging buffers will be retained on failure\n");
+	}
+
+	return;
+
+error:
+	mutex_lock(&root.mutex);
+	debugfs_remove(gsp->dir);
+	/* This dentry failed to be created, so put the reference back */
+	kref_put(&root.kref, release_root_dentry);
+	mutex_unlock(&root.mutex);
+}
+
+#endif
+
 static inline u64
 r535_gsp_libos_id8(const char *name)
 {
@@ -2126,7 +2387,11 @@ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size)
  * written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE.
  *
  * The physical address map for the log buffer is stored in the buffer
- * itself, starting with offset 1. Offset 0 contains the "put" pointer.
+ * itself, starting with offset 1. Offset 0 contains the "put" pointer (pp).
+ * Initially, pp is equal to 0. If the buffer has valid logging data in it,
+ * then pp points to index into the buffer where the next logging entry will
+ * be written. Therefore, the logging data is valid if:
+ *   1 <= pp < sizeof(buffer)/sizeof(u64)
  *
  * The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is
  * configured for a larger page size (e.g. 64K pages), we need to give
@@ -2197,6 +2462,11 @@ r535_gsp_libos_init(struct nvkm_gsp *gsp)
 	args[3].size = gsp->rmargs.size;
 	args[3].kind = LIBOS_MEMORY_REGION_CONTIGUOUS;
 	args[3].loc  = LIBOS_MEMORY_REGION_LOC_SYSMEM;
+
+#ifdef CONFIG_DEBUG_FS
+	r535_gsp_libos_debugfs_init(gsp);
+#endif
+
 	return 0;
 }
 
@@ -2507,6 +2777,227 @@ r535_gsp_dtor_fws(struct nvkm_gsp *gsp)
 	gsp->fws.rm = NULL;
 }
 
+#ifdef CONFIG_DEBUG_FS
+
+struct r535_gsp_log {
+	struct nvif_log log;
+
+	/*
+	 * Logging buffers in debugfs. The wrapper objects need to remain
+	 * in memory until the dentry is deleted.
+	 */
+	struct dentry *debugfs_logging_dir;
+	struct debugfs_blob_wrapper blob_init;
+	struct debugfs_blob_wrapper blob_intr;
+	struct debugfs_blob_wrapper blob_rm;
+	struct debugfs_blob_wrapper blob_pmu;
+};
+
+/**
+ * r535_debugfs_shutdown - delete GSP-RM logging buffers for one GPU
+ * @_log: nvif_log struct for this GPU
+ *
+ * Called when the driver is shutting down, to clean up the retained GSP-RM
+ * logging buffers.
+ */
+static void r535_debugfs_shutdown(struct nvif_log *_log)
+{
+	struct r535_gsp_log *log = container_of(_log, struct r535_gsp_log, log);
+
+	debugfs_remove(log->debugfs_logging_dir);
+	kref_put(&root.kref, release_root_dentry);
+
+	kfree(log->blob_init.data);
+	kfree(log->blob_intr.data);
+	kfree(log->blob_rm.data);
+	kfree(log->blob_pmu.data);
+
+	/* We also need to delete the list object */
+	kfree(log);
+}
+
+/**
+ * is_empty - return true if the logging buffer was never written to
+ * @b: blob wrapper with ->data field pointing to logging buffer
+ *
+ * The first 64-bit field of loginit, and logintr, and logrm is the 'put'
+ * pointer, and it is initialized to 0. It's a dword-based index into the
+ * circular buffer, indicating where the next printf write will be made.
+ *
+ * If the pointer is still 0 when GSP-RM is shut down, that means that the
+ * buffer was never written to, so it can be ignored.
+ *
+ * This test also works for logpmu, even though it doesn't have a put pointer.
+ */
+static bool is_empty(const struct debugfs_blob_wrapper *b)
+{
+	u64 *put = b->data;
+
+	return put ? (*put == 0) : true;
+}
+
+/**
+ * r535_gsp_copy_log - preserve the logging buffers in a blob
+ *
+ * When GSP shuts down, the nvkm_gsp object and all its memory is deleted.
+ * To preserve the logging buffers, the buffers need to be copied, but only
+ * if they actually have data.
+ */
+static int r535_gsp_copy_log(struct dentry *parent,
+			     const char *name,
+			     const struct debugfs_blob_wrapper *s,
+			     struct debugfs_blob_wrapper *t)
+{
+	struct dentry *dir;
+	void *p;
+
+	if (is_empty(s))
+		return 0;
+
+	/* The original buffers will be deleted*/
+	p = kmemdup(s->data, s->size, GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	t->data = p;
+	t->size = s->size;
+
+	dir = debugfs_create_blob(name, 0444, parent, t);
+	if (IS_ERR(dir)) {
+		kfree(p);
+		memset(t, 0, sizeof(*t));
+		return PTR_ERR(dir);
+	}
+
+	return 0;
+}
+
+/**
+ * r535_gsp_retain_logging - copy logging buffers to new debugfs root
+ * @gsp: gsp pointer
+ *
+ * If keep_gsp_logging is enabled, then we want to preserve the GSP-RM logging
+ * buffers and their debugfs entries, but all those objects would normally
+ * deleted if GSP-RM fails to load.
+ *
+ * To preserve the logging buffers, we need to:
+ *
+ * 1) Allocate new buffers and copy the logs into them, so that the original
+ * DMA buffers can be released.
+ *
+ * 2) Preserve the directories.  We don't need to save single dentries because
+ * we're going to delete the parent when the
+ *
+ * If anything fails in this process, then all the dentries need to be
+ * deleted.  We don't need to deallocate the original logging buffers because
+ * the caller will do that regardless.
+ */
+static void r535_gsp_retain_logging(struct nvkm_gsp *gsp)
+{
+	struct device *dev = gsp->subdev.device->dev;
+	struct dentry *dir;
+	struct r535_gsp_log *log = NULL;
+	int ret;
+
+	if (!keep_gsp_logging || !gsp->dir) {
+		/* Nothing to do */
+		goto exit;
+	}
+
+	/* Check to make sure at least one buffer has data. */
+	if (is_empty(&gsp->blob_init) && is_empty(&gsp->blob_intr) &&
+	    is_empty(&gsp->blob_rm) && is_empty(&gsp->blob_rm)) {
+		nvkm_warn(&gsp->subdev, "all logging buffers are empty\n");
+		goto exit;
+	}
+
+	log = kzalloc(sizeof(*log), GFP_KERNEL);
+	if (!log) {
+		debugfs_remove(dir);
+		goto error;
+	}
+
+	/*
+	 * Since the nvkm_gsp object is going away, the debugfs_blob_wrapper
+	 * objects are also being deleted, which means the dentries will no
+	 * longer be valid.  Delete the existing entries so that we can create
+	 * new ones with the same name.
+	 */
+	debugfs_remove(gsp->dir_init);
+	debugfs_remove(gsp->dir_intr);
+	debugfs_remove(gsp->dir_rm);
+	debugfs_remove(gsp->dir_pmu);
+
+	ret = r535_gsp_copy_log(gsp->dir, "loginit", &gsp->blob_init, &log->blob_init);
+	if (ret)
+		goto error;
+
+	ret = r535_gsp_copy_log(gsp->dir, "logintr", &gsp->blob_intr, &log->blob_intr);
+	if (ret)
+		goto error;
+
+	ret = r535_gsp_copy_log(gsp->dir, "logrm", &gsp->blob_rm, &log->blob_rm);
+	if (ret)
+		goto error;
+
+	ret = r535_gsp_copy_log(gsp->dir, "logpmu", &gsp->blob_pmu, &log->blob_pmu);
+	if (ret)
+		goto error;
+
+	/* The nvkm_gsp object is going away, so save the dentry */
+	log->debugfs_logging_dir = gsp->dir;
+
+	log->log.shutdown = r535_debugfs_shutdown;
+	list_add(&log->log.entry, &gsp_logs.head);
+
+	nvkm_warn(&gsp->subdev,
+		  "logging buffers migrated to /sys/kernel/debug/nouveau/%s\n",
+		  dev_name(dev));
+
+	return;
+
+error:
+	nvkm_warn(&gsp->subdev, "failed to migrate logging buffers\n");
+
+exit:
+	mutex_lock(&root.mutex);
+	debugfs_remove(gsp->dir);
+	kref_put(&root.kref, release_root_dentry);
+	mutex_unlock(&root.mutex);
+
+	if (log) {
+		kfree(log->blob_init.data);
+		kfree(log->blob_intr.data);
+		kfree(log->blob_rm.data);
+		kfree(log->blob_pmu.data);
+		kfree(log);
+	}
+}
+
+#endif
+
+/**
+ * r535_gsp_libos_debugfs_fini - cleanup/retain log buffers on shutdown
+ * @gsp: gsp pointer
+ *
+ * If the log buffers are exposed via debugfs, the data for those entries
+ * needs to be cleaned up when the GSP device shuts down.
+ */
+static void
+r535_gsp_libos_debugfs_fini(struct nvkm_gsp __maybe_unused *gsp)
+{
+#ifdef CONFIG_DEBUG_FS
+	r535_gsp_retain_logging(gsp);
+
+	/*
+	 * Unlike the other buffers, the PMU blob is a kmalloc'd buffer that
+	 * exists only if the debugfs entries were created.
+	 */
+	kfree(gsp->blob_pmu.data);
+	gsp->blob_pmu.data = NULL;
+#endif
+}
+
 void
 r535_gsp_dtor(struct nvkm_gsp *gsp)
 {
@@ -2528,6 +3019,9 @@ r535_gsp_dtor(struct nvkm_gsp *gsp)
 	nvkm_gsp_mem_dtor(&gsp->rmargs);
 	nvkm_gsp_mem_dtor(&gsp->wpr_meta);
 	nvkm_gsp_mem_dtor(&gsp->shm.mem);
+
+	r535_gsp_libos_debugfs_fini(gsp);
+
 	nvkm_gsp_mem_dtor(&gsp->loginit);
 	nvkm_gsp_mem_dtor(&gsp->logintr);
 	nvkm_gsp_mem_dtor(&gsp->logrm);
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] [v8] drm/nouveau: expose GSP-RM logging buffers via debugfs
  2024-09-10 21:56 ` [PATCH 2/2] [v8] drm/nouveau: expose GSP-RM logging buffers via debugfs Timur Tabi
@ 2024-10-03 21:48   ` Danilo Krummrich
  2024-10-29 19:50     ` Timur Tabi
  0 siblings, 1 reply; 5+ messages in thread
From: Danilo Krummrich @ 2024-10-03 21:48 UTC (permalink / raw)
  To: Timur Tabi; +Cc: Dave Airlie, bskeggs, nouveau

On Tue, Sep 10, 2024 at 04:56:16PM -0500, Timur Tabi wrote:
> The LOGINIT, LOGINTR, LOGRM, and LOGPMU buffers are circular buffers
> that have printf-like logs from GSP-RM and PMU encoded in them.
> 
> LOGINIT, LOGINTR, and LOGRM are allocated by Nouveau and their DMA
> addresses are passed to GSP-RM during initialization. The buffers are
> required for GSP-RM to initialize properly.
> 
> LOGPMU is also allocated by Nouveau, but its contents are updated
> when Nouveau receives an NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT RPC from
> GSP-RM. Nouveau then copies the RPC to the buffer.
> 
> The messages are encoded as an array of variable-length structures that
> contain the parameters to an NV_PRINTF call. The format string and
> parameter count are stored in a special ELF image that contains only
> logging strings. This image is not currently shipped with the Nvidia
> driver.
> 
> There are two methods to extract the logs.
> 
> OpenRM tries to load the logging ELF, and if present, parses the log
> buffers in real time and outputs the strings to the kernel console.
> 
> Alternatively, and this is the method used by this patch, the buffers
> can be exposed to user space, and a user-space tool (along with the
> logging ELF image) can parse the buffer and dump the logs.
> 
> This method has the advantage that it allows the buffers to be parsed
> even when the logging ELF file is not available to the user. However,
> it has the disadvantage the debugfs entries need to remain until the
> driver is unloaded.
> 
> The buffers are exposed via debugfs. If GSP-RM fails to initialize, then
> Nouveau immediately shuts down the GSP interface. This would normally
> also deallocate the logging buffers, thereby preventing the user from
> capturing the debug logs.
> 
> To avoid this, introduce the keep-gsp-logging command line parameter. If
> specified, and if at least one logging buffer has content, then Nouveau
> will migrate these buffers into new debugfs entries that are retained
> until the driver unloads.
> 
> An end-user can capture the logs using the following commands:
> 
>     cp /sys/kernel/debug/nouveau/<path>/loginit loginit
>     cp /sys/kernel/debug/nouveau/<path>/logrm logrm
>     cp /sys/kernel/debug/nouveau/<path>/logintr logintr
>     cp /sys/kernel/debug/nouveau/<path>/logpmu logpmu
> 
> where (for a PCI device) <path> is the PCI ID of the GPU (e.g.
> 0000:65:00.0).
> 
> Since LOGPMU is not needed for normal GSP-RM operation, it is only
> created if debugfs is available. Otherwise, the
> NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT RPCs are ignored.
> 
> A simple way to test the buffer migration feature is to have
> nvkm_gsp_init() return an error code.
> 
> Signed-off-by: Timur Tabi <ttabi@nvidia.com>
> ---
> v8:
>  Rebased onto drm-next 6.11.0-rc5
>  Using /sys/kernel/debug/nouveau instead of debug/dri
>  Note: this is an extensive change from v7, so please review thoroughly.
> 
>  drivers/gpu/drm/nouveau/include/nvif/log.h    |  57 ++
>  .../gpu/drm/nouveau/include/nvkm/subdev/gsp.h |  18 +
>  drivers/gpu/drm/nouveau/nouveau_drm.c         |  12 +
>  .../gpu/drm/nouveau/nvkm/subdev/gsp/r535.c    | 496 +++++++++++++++++-
>  4 files changed, 582 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/gpu/drm/nouveau/include/nvif/log.h
> 
> diff --git a/drivers/gpu/drm/nouveau/include/nvif/log.h b/drivers/gpu/drm/nouveau/include/nvif/log.h
> new file mode 100644
> index 000000000000..a2bf3be08113
> --- /dev/null
> +++ b/drivers/gpu/drm/nouveau/include/nvif/log.h
> @@ -0,0 +1,57 @@
> +/* SPDX-License-Identifier: MIT */
> +/* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. */
> +
> +#ifndef __NVIF_LOG_H__
> +#define __NVIF_LOG_H__
> +
> +/**
> + * nvif_log - structure for tracking logging buffers
> + * @entry: an entry in a list of struct nvif_logs
> + * @shutdown: pointer to function to call to clean up
> + *
> + * Structure used to track logging buffers so that they can be cleaned up
> + * when the module exits.
> + *
> + * The @shutdown function is called when the module exits. It should free all
> + * backing resources, such as logging buffers.
> + */
> +struct nvif_log {
> +	struct list_head entry;
> +	void (*shutdown)(struct nvif_log *log);
> +};
> +
> +/**
> + * nvif_logs - linked list of nvif_log objects
> + */
> +struct nvif_logs {
> +	struct list_head head;
> +};
> +
> +#define NVIF_LOGS_DECLARE(logs) \
> +	struct nvif_logs logs = { LIST_HEAD_INIT(logs.head) }
> +
> +static inline void nvif_log_shutdown(struct nvif_logs *logs)
> +{
> +	if (!list_empty(&logs->head)) {
> +		struct nvif_log *log, *n;
> +
> +		list_for_each_entry_safe(log, n, &logs->head, entry) {
> +			/* shutdown() should also delete the log entry */
> +			log->shutdown(log);
> +		}
> +	}
> +}
> +
> +#ifdef CONFIG_DEBUG_FS
> +/**
> + * gsp_logs - list of nvif_log GSP-RM logging buffers
> + *
> + * Head pointer to a a list of nvif_log buffers that is created for each GPU
> + * upon GSP shutdown if the "keep_gsp_logging" command-line parameter is
> + * specified. This is used to track the alternative debugfs entries for the
> + * GSP-RM logs.
> + */
> +extern struct nvif_logs gsp_logs;
> +#endif
> +
> +#endif
> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
> index a45a4ad843b9..474e8fec69ef 100644
> --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
> +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
> @@ -5,6 +5,8 @@
>  #include <core/falcon.h>
>  #include <core/firmware.h>
>  
> +#include <linux/debugfs.h>
> +
>  #define GSP_PAGE_SHIFT 12
>  #define GSP_PAGE_SIZE  BIT(GSP_PAGE_SHIFT)
>  
> @@ -220,6 +222,22 @@ struct nvkm_gsp {
>  
>  	/* The size of the registry RPC */
>  	size_t registry_rpc_size;
> +
> +#ifdef CONFIG_DEBUG_FS
> +	/*
> +	 * Logging buffers in debugfs. The wrapper objects need to remain
> +	 * in memory until the dentry is deleted.
> +	 */
> +	struct dentry *dir;		/* Parent dentry */
> +	struct dentry *dir_init;
> +	struct dentry *dir_rm;
> +	struct dentry *dir_intr;
> +	struct dentry *dir_pmu;

I think `dir` is confusing, maybe just `dent_*`? Or maybe just wrap all those in
a `struct { ... } debugfs;` and just name them `init`, `rm`, etc.?

> +	struct debugfs_blob_wrapper blob_init;
> +	struct debugfs_blob_wrapper blob_intr;
> +	struct debugfs_blob_wrapper blob_rm;
> +	struct debugfs_blob_wrapper blob_pmu;
> +#endif
>  };
>  
>  static inline bool
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
> index 4a9a9b9c3935..be6ee71a7ae7 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> @@ -46,6 +46,7 @@
>  #include <nvif/fifo.h>
>  #include <nvif/push006c.h>
>  #include <nvif/user.h>
> +#include <nvif/log.h>
>  
>  #include <nvif/class.h>
>  #include <nvif/cl0002.h>
> @@ -112,6 +113,13 @@ static struct drm_driver driver_stub;
>  static struct drm_driver driver_pci;
>  static struct drm_driver driver_platform;
>  
> +#ifdef CONFIG_DEBUG_FS
> +/**
> + * gsp_logs - list of GSP debugfs logging buffers
> + */
> +NVIF_LOGS_DECLARE(gsp_logs);
> +#endif
> +
>  static u64
>  nouveau_pci_name(struct pci_dev *pdev)
>  {
> @@ -1460,6 +1468,10 @@ nouveau_drm_exit(void)
>  #endif
>  	if (IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM))
>  		mmu_notifier_synchronize();
> +
> +#ifdef CONFIG_DEBUG_FS
> +	nvif_log_shutdown(&gsp_logs);
> +#endif
>  }
>  
>  module_init(nouveau_drm_init);
> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
> index 86450b0cd605..593279030eb6 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
> @@ -26,6 +26,8 @@
>  #include <subdev/vfn.h>
>  #include <engine/fifo/chan.h>
>  #include <engine/sec2.h>
> +#include <drm/drm_device.h>
> +#include <nvif/log.h>
>  
>  #include <nvfw/fw.h>
>  
> @@ -2076,6 +2078,265 @@ r535_gsp_rmargs_init(struct nvkm_gsp *gsp, bool resume)
>  	return 0;
>  }
>  
> +#ifdef CONFIG_DEBUG_FS
> +
> +/*
> + * If GSP-RM load fails, then the GSP nvkm object will be deleted, the logging
> + * debugfs entries will be deleted, and it will not be possible to debug the
> + * load failure. The keep_gsp_logging parameter tells Nouveau to copy the
> + * logging buffers to new debugfs entries, and these entries are retained
> + * until the driver unloads.
> + */
> +static bool keep_gsp_logging;
> +module_param(keep_gsp_logging, bool, 0444);
> +MODULE_PARM_DESC(keep_gsp_logging,
> +		 "Migrate the GSP-RM logging debugfs entries upon exit");
> +
> +/*
> + * The debugfs root for all devices.  Normally we'd use /sys/kernel/debug/dri.,
> + * but that path may not exist when we need it.  So create a new root
> + * /sys/kernel/debug/nouveau/.
> + *
> + * We take a reference every time a dentry under the root is created.  When
> + * the last reference is released, the root is deleted.
> + */
> +static struct {
> +	struct mutex mutex; /* Protects dentry */
> +	struct dentry *dentry;
> +	struct kref kref;
> +} root = {
> +	.mutex = __MUTEX_INITIALIZER(root.mutex),
> +	.kref = KREF_INIT(0),
> +	.dentry = NULL,
> +};
> +
> +static void release_root_dentry(struct kref *ref)
> +{
> +	mutex_lock(&root.mutex);
> +	debugfs_remove(root.dentry);
> +	root.dentry = NULL;
> +	mutex_unlock(&root.mutex);
> +}

I think all this should go into module_init() and module_exit(), then you don't
need the mutex and all the reference counts.

> +
> +/*
> + * GSP-RM uses a pseudo-class mechanism to define of a variety of per-"engine"
> + * data structures, and each engine has a "class ID" genererated by a
> + * pre-processor. This is the class ID for the PMU.
> + */
> +#define NV_GSP_MSG_EVENT_UCODE_LIBOS_CLASS_PMU		0xf3d722
> +
> +/**
> + * rpc_ucode_libos_print_v1E_08 - RPC payload for libos print buffers
> + * @ucode_eng_desc: the engine descriptor
> + * @libos_print_buf_size: the size of the libos_print_buf[]
> + * @libos_print_buf: the actual buffer
> + *
> + * The engine descriptor is divided into 31:8 "class ID" and 7:0 "instance
> + * ID". We only care about messages from PMU.
> + */
> +struct rpc_ucode_libos_print_v1e_08 {
> +	u32 ucode_eng_desc;
> +	u32 libos_print_buf_size;
> +	u8 libos_print_buf[];
> +};
> +
> +/**
> + * r535_gsp_msg_libos_print - capture log message from the PMU
> + * @priv: gsp pointer
> + * @fn: function number (ignored)
> + * @repv: pointer to libos print RPC
> + * @repc: message size
> + *
> + * Called when we receive a UCODE_LIBOS_PRINT event RPC from GSP-RM. This RPC
> + * contains the contents of the libos print buffer from PMU. It is typically
> + * only written to when PMU encounters an error.
> + *
> + * Technically this RPC can be used to pass print buffers from any number of
> + * GSP-RM engines, but we only expect to receive them for the PMU.
> + *
> + * For the PMU, the buffer is 4K in size and the RPC always contains the full
> + * contents.
> + */
> +static int
> +r535_gsp_msg_libos_print(void *priv, u32 fn, void *repv, u32 repc)
> +{
> +	struct nvkm_gsp *gsp = priv;
> +	struct nvkm_subdev *subdev = &gsp->subdev;
> +	struct rpc_ucode_libos_print_v1e_08 *rpc = repv;
> +	unsigned int class = rpc->ucode_eng_desc >> 8;
> +
> +	nvkm_debug(subdev, "received libos print from class 0x%x for %u bytes\n",
> +		   class, rpc->libos_print_buf_size);
> +
> +	if (class != NV_GSP_MSG_EVENT_UCODE_LIBOS_CLASS_PMU) {
> +		nvkm_warn(subdev,
> +			  "received libos print from unknown class 0x%x\n",
> +			  class);
> +		return -ENOMSG;
> +	}
> +
> +	if (rpc->libos_print_buf_size > GSP_PAGE_SIZE) {
> +		nvkm_error(subdev, "libos print is too large (%u bytes)\n",
> +			   rpc->libos_print_buf_size);
> +		return -E2BIG;
> +	}
> +
> +	memcpy(gsp->blob_pmu.data, rpc->libos_print_buf, rpc->libos_print_buf_size);
> +
> +	return 0;
> +}
> +
> +/**
> + * create_debufgs - create a blob debugfs entry
> + * @name: filename
> + * @parent: parent
> + * @blob: blob
> + *
> + * Creates a debugfs entry for a logging buffer with the name 'name'.
> + */
> +static struct dentry *create_debugfs(struct nvkm_gsp *gsp, const char *name,
> +				     struct debugfs_blob_wrapper *blob)
> +{
> +	struct dentry *dir;

I think `dir` is confusing, what about `dent` or `entry`?

> +
> +	dir = debugfs_create_blob(name, 0444, gsp->dir, blob);
> +	if (IS_ERR(dir)) {
> +		nvkm_error(&gsp->subdev,
> +			   "failed to create %s debugfs entry\n", name);
> +		return NULL;
> +	}
> +
> +	/*
> +	 * For some reason, debugfs_create_blob doesn't set the size of the
> +	 * dentry, so do that here.
> +	 */
> +	i_size_write(d_inode(dir), blob->size);

I think debugfs entries typically don't need this. Do we need it?

> +
> +	return dir;
> +}
> +
> +/**
> + * r535_gsp_libos_debugfs_init - create logging debugfs entries
> + * @gsp: gsp pointer
> + *
> + * Create the debugfs entries. This exposes the log buffers to userspace so
> + * that an external tool can parse it.
> + *
> + * The 'logpmu' contains exception dumps from the PMU. It is written via an
> + * RPC sent from GSP-RM and must be only 4KB. We create it here because it's
> + * only useful if there is a debugfs entry to expose it. If we get the PMU
> + * logging RPC and there is no debugfs entry, the RPC is just ignored.
> + *
> + * The blob_init, blob_rm, and blob_pmu objects can't be transient
> + * because debugfs_create_blob doesn't copy them.
> + *
> + * NOTE: OpenRM loads the logging elf image and prints the log messages
> + * in real-time. We may add that capability in the future, but that
> + * requires loading ELF images that are not distributed with the driver and
> + * adding the parsing code to Nouveau.
> + *
> + * Ideally, this should be part of nouveau_debugfs_init(), but that function
> + * is called too late. We really want to create these debugfs entries before
> + * r535_gsp_booter_load() is called, so that if GSP-RM fails to initialize,
> + * there could still be a log to capture.
> + */
> +static void
> +r535_gsp_libos_debugfs_init(struct nvkm_gsp *gsp)
> +{
> +	struct device *dev = gsp->subdev.device->dev;
> +
> +	mutex_lock(&root.mutex);
> +
> +	/* Create /sys/kerne/debug/nouveau if it doesn't already exist */
> +	if (!root.dentry) {
> +		root.dentry = debugfs_create_dir("nouveau", NULL);
> +		if (PTR_ERR_OR_ZERO(root.dentry)) {
> +			nvkm_error(&gsp->subdev, "could not create debugfs root\n");
> +			mutex_unlock(&root.mutex);
> +			return;
> +		}
> +		kref_init(&root.kref);
> +	} else {
> +		kref_get(&root.kref);
> +	}
> +
> +	/* Create a new debugfs directory with a name unique to this GPU. */
> +	gsp->dir = debugfs_create_dir(dev_name(dev), root.dentry);
> +	if (IS_ERR(gsp->dir)) {
> +		nvkm_error(&gsp->subdev,
> +			   "failed to create %s debugfs root\n", dev_name(dev));
> +		return;
> +	}
> +
> +	/* Take a reference every time we create a new per-GPU dentry */
> +	mutex_unlock(&root.mutex);
> +
> +	gsp->blob_init.data = gsp->loginit.data;
> +	gsp->blob_init.size = gsp->loginit.size;
> +	gsp->blob_intr.data = gsp->logintr.data;
> +	gsp->blob_intr.size = gsp->logintr.size;
> +	gsp->blob_rm.data = gsp->logrm.data;
> +	gsp->blob_rm.size = gsp->logrm.size;
> +
> +	gsp->dir_init = create_debugfs(gsp, "loginit", &gsp->blob_init);

Here you use your helper, but for...

> +	if (!gsp->dir_init)
> +		goto error;
> +
> +	gsp->dir_intr = debugfs_create_blob("logintr", 0444, gsp->dir, &gsp->blob_intr);
> +	if (IS_ERR(gsp->dir_intr)) {
> +		nvkm_error(&gsp->subdev, "failed to create logintr debugfs entry\n");
> +		goto error;
> +	}
> +
> +	gsp->dir_rm = debugfs_create_blob("logrm", 0444, gsp->dir, &gsp->blob_rm);
> +	if (IS_ERR(gsp->dir_rm)) {
> +		nvkm_error(&gsp->subdev, "failed to create logrm debugfs entry\n");
> +		goto error;
> +	}
> +
> +	/*
> +	 * Since the PMU buffer is copied from an RPC, it doesn't need to be
> +	 * a DMA buffer.
> +	 */
> +	gsp->blob_pmu.size = GSP_PAGE_SIZE;
> +	gsp->blob_pmu.data = kzalloc(gsp->blob_pmu.size, GFP_KERNEL);
> +	if (!gsp->blob_pmu.data)
> +		goto error;
> +
> +	gsp->dir_pmu = debugfs_create_blob("logpmu", 0444, gsp->dir, &gsp->blob_pmu);
> +	if (IS_ERR(gsp->dir_pmu)) {
> +		nvkm_error(&gsp->subdev, "failed to create logpmu debugfs entry\n");
> +		kfree(gsp->blob_pmu.data);
> +		goto error;
> +	}
> +
> +	i_size_write(d_inode(gsp->dir_init), gsp->blob_init.size);
> +	i_size_write(d_inode(gsp->dir_intr), gsp->blob_intr.size);
> +	i_size_write(d_inode(gsp->dir_rm), gsp->blob_rm.size);
> +	i_size_write(d_inode(gsp->dir_pmu), gsp->blob_pmu.size);

...all those, it seems you forgot to switch to your helper.

> +
> +	r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT,
> +			      r535_gsp_msg_libos_print, gsp);
> +
> +	nvkm_debug(&gsp->subdev, "created debugfs GSP-RM logging entries\n");
> +
> +	if (keep_gsp_logging) {
> +		nvkm_info(&gsp->subdev,
> +			  "logging buffers will be retained on failure\n");
> +	}
> +
> +	return;
> +
> +error:
> +	mutex_lock(&root.mutex);
> +	debugfs_remove(gsp->dir);
> +	/* This dentry failed to be created, so put the reference back */
> +	kref_put(&root.kref, release_root_dentry);
> +	mutex_unlock(&root.mutex);
> +}
> +
> +#endif
> +
>  static inline u64
>  r535_gsp_libos_id8(const char *name)
>  {
> @@ -2126,7 +2387,11 @@ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size)
>   * written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE.
>   *
>   * The physical address map for the log buffer is stored in the buffer
> - * itself, starting with offset 1. Offset 0 contains the "put" pointer.
> + * itself, starting with offset 1. Offset 0 contains the "put" pointer (pp).
> + * Initially, pp is equal to 0. If the buffer has valid logging data in it,
> + * then pp points to index into the buffer where the next logging entry will
> + * be written. Therefore, the logging data is valid if:
> + *   1 <= pp < sizeof(buffer)/sizeof(u64)
>   *
>   * The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is
>   * configured for a larger page size (e.g. 64K pages), we need to give
> @@ -2197,6 +2462,11 @@ r535_gsp_libos_init(struct nvkm_gsp *gsp)
>  	args[3].size = gsp->rmargs.size;
>  	args[3].kind = LIBOS_MEMORY_REGION_CONTIGUOUS;
>  	args[3].loc  = LIBOS_MEMORY_REGION_LOC_SYSMEM;
> +
> +#ifdef CONFIG_DEBUG_FS
> +	r535_gsp_libos_debugfs_init(gsp);
> +#endif
> +
>  	return 0;
>  }
>  
> @@ -2507,6 +2777,227 @@ r535_gsp_dtor_fws(struct nvkm_gsp *gsp)
>  	gsp->fws.rm = NULL;
>  }
>  
> +#ifdef CONFIG_DEBUG_FS
> +
> +struct r535_gsp_log {
> +	struct nvif_log log;
> +
> +	/*
> +	 * Logging buffers in debugfs. The wrapper objects need to remain
> +	 * in memory until the dentry is deleted.
> +	 */
> +	struct dentry *debugfs_logging_dir;
> +	struct debugfs_blob_wrapper blob_init;
> +	struct debugfs_blob_wrapper blob_intr;
> +	struct debugfs_blob_wrapper blob_rm;
> +	struct debugfs_blob_wrapper blob_pmu;
> +};
> +
> +/**
> + * r535_debugfs_shutdown - delete GSP-RM logging buffers for one GPU
> + * @_log: nvif_log struct for this GPU
> + *
> + * Called when the driver is shutting down, to clean up the retained GSP-RM
> + * logging buffers.
> + */
> +static void r535_debugfs_shutdown(struct nvif_log *_log)
> +{
> +	struct r535_gsp_log *log = container_of(_log, struct r535_gsp_log, log);
> +
> +	debugfs_remove(log->debugfs_logging_dir);
> +	kref_put(&root.kref, release_root_dentry);
> +
> +	kfree(log->blob_init.data);
> +	kfree(log->blob_intr.data);
> +	kfree(log->blob_rm.data);
> +	kfree(log->blob_pmu.data);
> +
> +	/* We also need to delete the list object */
> +	kfree(log);
> +}
> +
> +/**
> + * is_empty - return true if the logging buffer was never written to
> + * @b: blob wrapper with ->data field pointing to logging buffer
> + *
> + * The first 64-bit field of loginit, and logintr, and logrm is the 'put'
> + * pointer, and it is initialized to 0. It's a dword-based index into the
> + * circular buffer, indicating where the next printf write will be made.
> + *
> + * If the pointer is still 0 when GSP-RM is shut down, that means that the
> + * buffer was never written to, so it can be ignored.
> + *
> + * This test also works for logpmu, even though it doesn't have a put pointer.
> + */
> +static bool is_empty(const struct debugfs_blob_wrapper *b)
> +{
> +	u64 *put = b->data;
> +
> +	return put ? (*put == 0) : true;
> +}
> +
> +/**
> + * r535_gsp_copy_log - preserve the logging buffers in a blob
> + *
> + * When GSP shuts down, the nvkm_gsp object and all its memory is deleted.
> + * To preserve the logging buffers, the buffers need to be copied, but only
> + * if they actually have data.
> + */
> +static int r535_gsp_copy_log(struct dentry *parent,
> +			     const char *name,
> +			     const struct debugfs_blob_wrapper *s,
> +			     struct debugfs_blob_wrapper *t)
> +{
> +	struct dentry *dir;

Again, I wouldn't use `dir`.

> +	void *p;
> +
> +	if (is_empty(s))
> +		return 0;
> +
> +	/* The original buffers will be deleted*/
> +	p = kmemdup(s->data, s->size, GFP_KERNEL);
> +	if (!p)
> +		return -ENOMEM;
> +
> +	t->data = p;
> +	t->size = s->size;
> +
> +	dir = debugfs_create_blob(name, 0444, parent, t);
> +	if (IS_ERR(dir)) {
> +		kfree(p);
> +		memset(t, 0, sizeof(*t));
> +		return PTR_ERR(dir);
> +	}
> +
> +	return 0;
> +}
> +
> +/**
> + * r535_gsp_retain_logging - copy logging buffers to new debugfs root
> + * @gsp: gsp pointer
> + *
> + * If keep_gsp_logging is enabled, then we want to preserve the GSP-RM logging
> + * buffers and their debugfs entries, but all those objects would normally
> + * deleted if GSP-RM fails to load.
> + *
> + * To preserve the logging buffers, we need to:
> + *
> + * 1) Allocate new buffers and copy the logs into them, so that the original
> + * DMA buffers can be released.
> + *
> + * 2) Preserve the directories.  We don't need to save single dentries because
> + * we're going to delete the parent when the
> + *
> + * If anything fails in this process, then all the dentries need to be
> + * deleted.  We don't need to deallocate the original logging buffers because
> + * the caller will do that regardless.
> + */
> +static void r535_gsp_retain_logging(struct nvkm_gsp *gsp)
> +{
> +	struct device *dev = gsp->subdev.device->dev;
> +	struct dentry *dir;
> +	struct r535_gsp_log *log = NULL;
> +	int ret;
> +
> +	if (!keep_gsp_logging || !gsp->dir) {
> +		/* Nothing to do */
> +		goto exit;
> +	}
> +
> +	/* Check to make sure at least one buffer has data. */
> +	if (is_empty(&gsp->blob_init) && is_empty(&gsp->blob_intr) &&
> +	    is_empty(&gsp->blob_rm) && is_empty(&gsp->blob_rm)) {
> +		nvkm_warn(&gsp->subdev, "all logging buffers are empty\n");
> +		goto exit;
> +	}
> +
> +	log = kzalloc(sizeof(*log), GFP_KERNEL);
> +	if (!log) {
> +		debugfs_remove(dir);
> +		goto error;
> +	}
> +
> +	/*
> +	 * Since the nvkm_gsp object is going away, the debugfs_blob_wrapper
> +	 * objects are also being deleted, which means the dentries will no
> +	 * longer be valid.  Delete the existing entries so that we can create
> +	 * new ones with the same name.
> +	 */
> +	debugfs_remove(gsp->dir_init);
> +	debugfs_remove(gsp->dir_intr);
> +	debugfs_remove(gsp->dir_rm);
> +	debugfs_remove(gsp->dir_pmu);
> +
> +	ret = r535_gsp_copy_log(gsp->dir, "loginit", &gsp->blob_init, &log->blob_init);
> +	if (ret)
> +		goto error;
> +
> +	ret = r535_gsp_copy_log(gsp->dir, "logintr", &gsp->blob_intr, &log->blob_intr);
> +	if (ret)
> +		goto error;
> +
> +	ret = r535_gsp_copy_log(gsp->dir, "logrm", &gsp->blob_rm, &log->blob_rm);
> +	if (ret)
> +		goto error;
> +
> +	ret = r535_gsp_copy_log(gsp->dir, "logpmu", &gsp->blob_pmu, &log->blob_pmu);
> +	if (ret)
> +		goto error;
> +
> +	/* The nvkm_gsp object is going away, so save the dentry */
> +	log->debugfs_logging_dir = gsp->dir;
> +
> +	log->log.shutdown = r535_debugfs_shutdown;
> +	list_add(&log->log.entry, &gsp_logs.head);
> +
> +	nvkm_warn(&gsp->subdev,
> +		  "logging buffers migrated to /sys/kernel/debug/nouveau/%s\n",
> +		  dev_name(dev));
> +
> +	return;
> +
> +error:
> +	nvkm_warn(&gsp->subdev, "failed to migrate logging buffers\n");
> +
> +exit:
> +	mutex_lock(&root.mutex);
> +	debugfs_remove(gsp->dir);
> +	kref_put(&root.kref, release_root_dentry);
> +	mutex_unlock(&root.mutex);
> +
> +	if (log) {
> +		kfree(log->blob_init.data);
> +		kfree(log->blob_intr.data);
> +		kfree(log->blob_rm.data);
> +		kfree(log->blob_pmu.data);
> +		kfree(log);
> +	}
> +}
> +
> +#endif
> +
> +/**
> + * r535_gsp_libos_debugfs_fini - cleanup/retain log buffers on shutdown
> + * @gsp: gsp pointer
> + *
> + * If the log buffers are exposed via debugfs, the data for those entries
> + * needs to be cleaned up when the GSP device shuts down.
> + */
> +static void
> +r535_gsp_libos_debugfs_fini(struct nvkm_gsp __maybe_unused *gsp)
> +{
> +#ifdef CONFIG_DEBUG_FS
> +	r535_gsp_retain_logging(gsp);
> +
> +	/*
> +	 * Unlike the other buffers, the PMU blob is a kmalloc'd buffer that
> +	 * exists only if the debugfs entries were created.
> +	 */
> +	kfree(gsp->blob_pmu.data);
> +	gsp->blob_pmu.data = NULL;
> +#endif
> +}
> +
>  void
>  r535_gsp_dtor(struct nvkm_gsp *gsp)
>  {
> @@ -2528,6 +3019,9 @@ r535_gsp_dtor(struct nvkm_gsp *gsp)
>  	nvkm_gsp_mem_dtor(&gsp->rmargs);
>  	nvkm_gsp_mem_dtor(&gsp->wpr_meta);
>  	nvkm_gsp_mem_dtor(&gsp->shm.mem);
> +
> +	r535_gsp_libos_debugfs_fini(gsp);
> +
>  	nvkm_gsp_mem_dtor(&gsp->loginit);
>  	nvkm_gsp_mem_dtor(&gsp->logintr);
>  	nvkm_gsp_mem_dtor(&gsp->logrm);
> -- 
> 2.34.1
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] [v8] drm/nouveau: expose GSP-RM logging buffers via debugfs
  2024-10-03 21:48   ` Danilo Krummrich
@ 2024-10-29 19:50     ` Timur Tabi
  2024-10-29 23:40       ` Danilo Krummrich
  0 siblings, 1 reply; 5+ messages in thread
From: Timur Tabi @ 2024-10-29 19:50 UTC (permalink / raw)
  To: dakr@kernel.org
  Cc: airlied@redhat.com, nouveau@lists.freedesktop.org, Ben Skeggs

On Thu, 2024-10-03 at 23:48 +0200, Danilo Krummrich wrote:
> > +#ifdef CONFIG_DEBUG_FS
> > +	/*
> > +	 * Logging buffers in debugfs. The wrapper objects need to remain
> > +	 * in memory until the dentry is deleted.
> > +	 */
> > +	struct dentry *dir;		/* Parent dentry */
> > +	struct dentry *dir_init;
> > +	struct dentry *dir_rm;
> > +	struct dentry *dir_intr;
> > +	struct dentry *dir_pmu;
> 
> I think `dir` is confusing, maybe just `dent_*`? Or maybe just wrap all those in
> a `struct { ... } debugfs;` and just name them `init`, `rm`, etc.?

Ok, but I'm pretty sure this is like the fifth time I've moved these fields
around because you keep telling me to put them somewhere else.

> > +/*
> > + * The debugfs root for all devices.  Normally we'd use /sys/kernel/debug/dri.,
> > + * but that path may not exist when we need it.  So create a new root
> > + * /sys/kernel/debug/nouveau/.
> > + *
> > + * We take a reference every time a dentry under the root is created.  When
> > + * the last reference is released, the root is deleted.
> > + */
> > +static struct {
> > +	struct mutex mutex; /* Protects dentry */
> > +	struct dentry *dentry;
> > +	struct kref kref;
> > +} root = {
> > +	.mutex = __MUTEX_INITIALIZER(root.mutex),
> > +	.kref = KREF_INIT(0),
> > +	.dentry = NULL,
> > +};
> > +
> > +static void release_root_dentry(struct kref *ref)
> > +{
> > +	mutex_lock(&root.mutex);
> > +	debugfs_remove(root.dentry);
> > +	root.dentry = NULL;
> > +	mutex_unlock(&root.mutex);
> > +}
> 
> I think all this should go into module_init() and module_exit(), then you don't
> need the mutex and all the reference counts.

Sorry, I don't see how I can just move "all this" to module_init and exit. 
The point is to keep the parent dentry around until the last GPU has shut
down in r535_debugfs_shutdown().  

Please tell me what you think module_init() and module_exit() will look
like.

> > +/**
> > + * create_debufgs - create a blob debugfs entry
> > + * @name: filename
> > + * @parent: parent
> > + * @blob: blob
> > + *
> > + * Creates a debugfs entry for a logging buffer with the name 'name'.
> > + */
> > +static struct dentry *create_debugfs(struct nvkm_gsp *gsp, const char *name,
> > +				     struct debugfs_blob_wrapper *blob)
> > +{
> > +	struct dentry *dir;
> 
> I think `dir` is confusing, what about `dent` or `entry`?

Here's a count of the most popular names for this type:

     10 	struct dentry *ddir;
     18 	struct dentry *d;
     21 	struct dentry *debugfs_root;
     31 	struct dentry *dbgfs_dir;
     39 	struct dentry *debugfs_dir;
     50 	struct dentry *dentry;
     55 	struct dentry *debugfs;
     55 	struct dentry *dir;
     64 	struct dentry *root;

As you can see, 'dir' is the second most popular name.  So I think it's
fine.

Besides, couldn't you have make this suggestion during one of the previous 7
versions of this patch?  I'm never going to get this patch finished if you
keep asking for cosmetic changes.

> > +	dir = debugfs_create_blob(name, 0444, gsp->dir, blob);
> > +	if (IS_ERR(dir)) {
> > +		nvkm_error(&gsp->subdev,
> > +			   "failed to create %s debugfs entry\n", name);
> > +		return NULL;
> > +	}
> > +
> > +	/*
> > +	 * For some reason, debugfs_create_blob doesn't set the size of the
> > +	 * dentry, so do that here.
> > +	 */
> > +	i_size_write(d_inode(dir), blob->size);
> 
> I think debugfs entries typically don't need this. Do we need it?

Yes.  I submitted a patch to debugfs earlier this year that would fix it for
all debugfs blobs, but it was rejected because I was asked to fix all of
debugfs itself, which I wasn't willing to do.

https://www.spinics.net/lists/linux-fsdevel/msg262843.html

> > +	gsp->dir_init = create_debugfs(gsp, "loginit", &gsp->blob_init);
> 
> Here you use your helper, but for...
> 
> > +	if (!gsp->dir_init)
> > +		goto error;
> > +
> > +	gsp->dir_intr = debugfs_create_blob("logintr", 0444, gsp->dir, &gsp->blob_intr);
> > +	if (IS_ERR(gsp->dir_intr)) {
> > +		nvkm_error(&gsp->subdev, "failed to create logintr debugfs entry\n");
> > +		goto error;
> > +	}
> > +
> > +	gsp->dir_rm = debugfs_create_blob("logrm", 0444, gsp->dir, &gsp->blob_rm);
> > +	if (IS_ERR(gsp->dir_rm)) {
> > +		nvkm_error(&gsp->subdev, "failed to create logrm debugfs entry\n");
> > +		goto error;
> > +	}
> > +
> > +	/*
> > +	 * Since the PMU buffer is copied from an RPC, it doesn't need to be
> > +	 * a DMA buffer.
> > +	 */
> > +	gsp->blob_pmu.size = GSP_PAGE_SIZE;
> > +	gsp->blob_pmu.data = kzalloc(gsp->blob_pmu.size, GFP_KERNEL);
> > +	if (!gsp->blob_pmu.data)
> > +		goto error;
> > +
> > +	gsp->dir_pmu = debugfs_create_blob("logpmu", 0444, gsp->dir, &gsp->blob_pmu);
> > +	if (IS_ERR(gsp->dir_pmu)) {
> > +		nvkm_error(&gsp->subdev, "failed to create logpmu debugfs entry\n");
> > +		kfree(gsp->blob_pmu.data);
> > +		goto error;
> > +	}
> > +
> > +	i_size_write(d_inode(gsp->dir_init), gsp->blob_init.size);
> > +	i_size_write(d_inode(gsp->dir_intr), gsp->blob_intr.size);
> > +	i_size_write(d_inode(gsp->dir_rm), gsp->blob_rm.size);
> > +	i_size_write(d_inode(gsp->dir_pmu), gsp->blob_pmu.size);
> 
> ...all those, it seems you forgot to switch to your helper.

Oops.  I think I've been working on this patch too long.

I will post a v10 with struct { ... } debugfs and create_debugfs fixes.  But
I will need your help with the module_init/exit change if you still want
that.


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] [v8] drm/nouveau: expose GSP-RM logging buffers via debugfs
  2024-10-29 19:50     ` Timur Tabi
@ 2024-10-29 23:40       ` Danilo Krummrich
  0 siblings, 0 replies; 5+ messages in thread
From: Danilo Krummrich @ 2024-10-29 23:40 UTC (permalink / raw)
  To: Timur Tabi; +Cc: airlied@redhat.com, nouveau@lists.freedesktop.org, Ben Skeggs

On Tue, Oct 29, 2024 at 07:50:06PM +0000, Timur Tabi wrote:
> On Thu, 2024-10-03 at 23:48 +0200, Danilo Krummrich wrote:
> > > +#ifdef CONFIG_DEBUG_FS
> > > +	/*
> > > +	 * Logging buffers in debugfs. The wrapper objects need to remain
> > > +	 * in memory until the dentry is deleted.
> > > +	 */
> > > +	struct dentry *dir;		/* Parent dentry */
> > > +	struct dentry *dir_init;
> > > +	struct dentry *dir_rm;
> > > +	struct dentry *dir_intr;
> > > +	struct dentry *dir_pmu;
> > 
> > I think `dir` is confusing, maybe just `dent_*`? Or maybe just wrap all those in
> > a `struct { ... } debugfs;` and just name them `init`, `rm`, etc.?
> 
> Ok, but I'm pretty sure this is like the fifth time I've moved these fields
> around because you keep telling me to put them somewhere else.

You added those fields in *this* version, so I don't see how that makes any
sense.

If you don't want to rename them -- fine too. It was a suggestion, and since
you'll need a v9 anyways it doesn't seem like too much to ask for.

> 
> > > +/*
> > > + * The debugfs root for all devices.  Normally we'd use /sys/kernel/debug/dri.,
> > > + * but that path may not exist when we need it.  So create a new root
> > > + * /sys/kernel/debug/nouveau/.
> > > + *
> > > + * We take a reference every time a dentry under the root is created.  When
> > > + * the last reference is released, the root is deleted.
> > > + */
> > > +static struct {
> > > +	struct mutex mutex; /* Protects dentry */
> > > +	struct dentry *dentry;
> > > +	struct kref kref;
> > > +} root = {
> > > +	.mutex = __MUTEX_INITIALIZER(root.mutex),
> > > +	.kref = KREF_INIT(0),
> > > +	.dentry = NULL,
> > > +};
> > > +
> > > +static void release_root_dentry(struct kref *ref)
> > > +{
> > > +	mutex_lock(&root.mutex);
> > > +	debugfs_remove(root.dentry);
> > > +	root.dentry = NULL;
> > > +	mutex_unlock(&root.mutex);
> > > +}
> > 
> > I think all this should go into module_init() and module_exit(), then you don't
> > need the mutex and all the reference counts.
> 
> Sorry, I don't see how I can just move "all this" to module_init and exit. 
> The point is to keep the parent dentry around until the last GPU has shut
> down in r535_debugfs_shutdown().  
> 
> Please tell me what you think module_init() and module_exit() will look
> like.

You can create the root debugfs entry in module_init() and remove it in
module_exit() and make it available as a global. Then you don't need any mutex
and reference count.

Please let me know if anything is still unclear or if I miss anything.

> 
> > > +/**
> > > + * create_debufgs - create a blob debugfs entry
> > > + * @name: filename
> > > + * @parent: parent
> > > + * @blob: blob
> > > + *
> > > + * Creates a debugfs entry for a logging buffer with the name 'name'.
> > > + */
> > > +static struct dentry *create_debugfs(struct nvkm_gsp *gsp, const char *name,
> > > +				     struct debugfs_blob_wrapper *blob)
> > > +{
> > > +	struct dentry *dir;
> > 
> > I think `dir` is confusing, what about `dent` or `entry`?
> 
> Here's a count of the most popular names for this type:
> 
>      10 	struct dentry *ddir;
>      18 	struct dentry *d;
>      21 	struct dentry *debugfs_root;
>      31 	struct dentry *dbgfs_dir;
>      39 	struct dentry *debugfs_dir;
>      50 	struct dentry *dentry;
>      55 	struct dentry *debugfs;
>      55 	struct dentry *dir;
>      64 	struct dentry *root;
> 
> As you can see, 'dir' is the second most popular name.  So I think it's
> fine.

Being the second most popular name doesn't make it a good name. I wonder how
often people use "password" as their password...

> 
> Besides, couldn't you have make this suggestion during one of the previous 7
> versions of this patch?  I'm never going to get this patch finished if you
> keep asking for cosmetic changes.

Agreed, however, I think it got my attention, since you added all those

+	struct dentry *dir;		/* Parent dentry */
+	struct dentry *dir_init;
+	struct dentry *dir_rm;
+	struct dentry *dir_intr;
+	struct dentry *dir_pmu;

to struct nvkm_gsp in *this* version.

Feel free to keep it, but as mentioned above, you need a v9 anyways, so it also
shouldn't be a big deal to change those.

Please also note that none of the revisions you have been sending were motivated
by "cosmetic changes".

> 
> > > +	dir = debugfs_create_blob(name, 0444, gsp->dir, blob);
> > > +	if (IS_ERR(dir)) {
> > > +		nvkm_error(&gsp->subdev,
> > > +			   "failed to create %s debugfs entry\n", name);
> > > +		return NULL;
> > > +	}
> > > +
> > > +	/*
> > > +	 * For some reason, debugfs_create_blob doesn't set the size of the
> > > +	 * dentry, so do that here.
> > > +	 */
> > > +	i_size_write(d_inode(dir), blob->size);
> > 
> > I think debugfs entries typically don't need this. Do we need it?
> 
> Yes.  I submitted a patch to debugfs earlier this year that would fix it for
> all debugfs blobs, but it was rejected because I was asked to fix all of
> debugfs itself, which I wasn't willing to do.
> 
> https://www.spinics.net/lists/linux-fsdevel/msg262843.html

I see...

Can you please add a very brief comment and this [1] link?

[1] https://lore.kernel.org/r/linux-fsdevel/20240207200619.3354549-1-ttabi@nvidia.com/

> 
> > > +	gsp->dir_init = create_debugfs(gsp, "loginit", &gsp->blob_init);
> > 
> > Here you use your helper, but for...
> > 
> > > +	if (!gsp->dir_init)
> > > +		goto error;
> > > +
> > > +	gsp->dir_intr = debugfs_create_blob("logintr", 0444, gsp->dir, &gsp->blob_intr);
> > > +	if (IS_ERR(gsp->dir_intr)) {
> > > +		nvkm_error(&gsp->subdev, "failed to create logintr debugfs entry\n");
> > > +		goto error;
> > > +	}
> > > +
> > > +	gsp->dir_rm = debugfs_create_blob("logrm", 0444, gsp->dir, &gsp->blob_rm);
> > > +	if (IS_ERR(gsp->dir_rm)) {
> > > +		nvkm_error(&gsp->subdev, "failed to create logrm debugfs entry\n");
> > > +		goto error;
> > > +	}
> > > +
> > > +	/*
> > > +	 * Since the PMU buffer is copied from an RPC, it doesn't need to be
> > > +	 * a DMA buffer.
> > > +	 */
> > > +	gsp->blob_pmu.size = GSP_PAGE_SIZE;
> > > +	gsp->blob_pmu.data = kzalloc(gsp->blob_pmu.size, GFP_KERNEL);
> > > +	if (!gsp->blob_pmu.data)
> > > +		goto error;
> > > +
> > > +	gsp->dir_pmu = debugfs_create_blob("logpmu", 0444, gsp->dir, &gsp->blob_pmu);
> > > +	if (IS_ERR(gsp->dir_pmu)) {
> > > +		nvkm_error(&gsp->subdev, "failed to create logpmu debugfs entry\n");
> > > +		kfree(gsp->blob_pmu.data);
> > > +		goto error;
> > > +	}
> > > +
> > > +	i_size_write(d_inode(gsp->dir_init), gsp->blob_init.size);
> > > +	i_size_write(d_inode(gsp->dir_intr), gsp->blob_intr.size);
> > > +	i_size_write(d_inode(gsp->dir_rm), gsp->blob_rm.size);
> > > +	i_size_write(d_inode(gsp->dir_pmu), gsp->blob_pmu.size);
> > 
> > ...all those, it seems you forgot to switch to your helper.
> 
> Oops.  I think I've been working on this patch too long.
> 
> I will post a v10 with struct { ... } debugfs and create_debugfs fixes.  But
> I will need your help with the module_init/exit change if you still want
> that.
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-10-29 23:40 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-10 21:56 [PATCH 1/2] [v2] drm/nouveau: retain device pointer in nvkm_gsp_mem object Timur Tabi
2024-09-10 21:56 ` [PATCH 2/2] [v8] drm/nouveau: expose GSP-RM logging buffers via debugfs Timur Tabi
2024-10-03 21:48   ` Danilo Krummrich
2024-10-29 19:50     ` Timur Tabi
2024-10-29 23:40       ` Danilo Krummrich

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.