* [PATCH 01/17] drm/i915: Add ability for tracking buffer objects per client
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
@ 2023-07-12 11:45 ` Tvrtko Ursulin
2023-07-12 11:45 ` [PATCH 02/17] drm/i915: Record which client owns a VM Tvrtko Ursulin
` (16 subsequent siblings)
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:45 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
In order to show per client memory usage lets add some infrastructure
which enables tracking buffer objects owned by clients.
We add a per client list protected by a new per client lock and to support
delayed destruction (post client exit) we make tracked objects hold
references to the owning client.
Also, object memory region teardown is moved to the existing RCU free
callback to allow safe dereference from the fdinfo RCU read section.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_object.c | 13 +++++--
.../gpu/drm/i915/gem/i915_gem_object_types.h | 12 +++++++
drivers/gpu/drm/i915/i915_drm_client.c | 36 +++++++++++++++++++
drivers/gpu/drm/i915/i915_drm_client.h | 32 +++++++++++++++++
4 files changed, 90 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 97ac6fb37958..3dc4fbb67d2b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -105,6 +105,10 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
INIT_LIST_HEAD(&obj->mm.link);
+#ifdef CONFIG_PROC_FS
+ INIT_LIST_HEAD(&obj->client_link);
+#endif
+
INIT_LIST_HEAD(&obj->lut_list);
spin_lock_init(&obj->lut_lock);
@@ -292,6 +296,10 @@ void __i915_gem_free_object_rcu(struct rcu_head *head)
container_of(head, typeof(*obj), rcu);
struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ /* We need to keep this alive for RCU read access from fdinfo. */
+ if (obj->mm.n_placements > 1)
+ kfree(obj->mm.placements);
+
i915_gem_object_free(obj);
GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
@@ -388,9 +396,6 @@ void __i915_gem_free_object(struct drm_i915_gem_object *obj)
if (obj->ops->release)
obj->ops->release(obj);
- if (obj->mm.n_placements > 1)
- kfree(obj->mm.placements);
-
if (obj->shares_resv_from)
i915_vm_resv_put(obj->shares_resv_from);
@@ -441,6 +446,8 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
GEM_BUG_ON(i915_gem_object_is_framebuffer(obj));
+ i915_drm_client_remove_object(obj);
+
/*
* Before we free the object, make sure any pure RCU-only
* read-side critical sections are complete, e.g.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index e72c57716bee..8de2b91b3edf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -300,6 +300,18 @@ struct drm_i915_gem_object {
*/
struct i915_address_space *shares_resv_from;
+#ifdef CONFIG_PROC_FS
+ /**
+ * @client: @i915_drm_client which created the object
+ */
+ struct i915_drm_client *client;
+
+ /**
+ * @client_link: Link into @i915_drm_client.objects_list
+ */
+ struct list_head client_link;
+#endif
+
union {
struct rcu_head rcu;
struct llist_node freed;
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 2a44b3876cb5..2e5e69edc0f9 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -28,6 +28,10 @@ struct i915_drm_client *i915_drm_client_alloc(void)
kref_init(&client->kref);
spin_lock_init(&client->ctx_lock);
INIT_LIST_HEAD(&client->ctx_list);
+#ifdef CONFIG_PROC_FS
+ spin_lock_init(&client->objects_lock);
+ INIT_LIST_HEAD(&client->objects_list);
+#endif
return client;
}
@@ -108,4 +112,36 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
show_client_class(p, i915, file_priv->client, i);
}
+
+void i915_drm_client_add_object(struct i915_drm_client *client,
+ struct drm_i915_gem_object *obj)
+{
+ unsigned long flags;
+
+ GEM_WARN_ON(obj->client);
+ GEM_WARN_ON(!list_empty(&obj->client_link));
+
+ spin_lock_irqsave(&client->objects_lock, flags);
+ obj->client = i915_drm_client_get(client);
+ list_add_tail_rcu(&obj->client_link, &client->objects_list);
+ spin_unlock_irqrestore(&client->objects_lock, flags);
+}
+
+bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
+{
+ struct i915_drm_client *client = fetch_and_zero(&obj->client);
+ unsigned long flags;
+
+ /* Object may not be associated with a client. */
+ if (!client)
+ return false;
+
+ spin_lock_irqsave(&client->objects_lock, flags);
+ list_del_rcu(&obj->client_link);
+ spin_unlock_irqrestore(&client->objects_lock, flags);
+
+ i915_drm_client_put(client);
+
+ return true;
+}
#endif
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 67816c912bca..5f58fdf7dcb8 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -12,6 +12,9 @@
#include <uapi/drm/i915_drm.h>
+#include "i915_file_private.h"
+#include "gem/i915_gem_object_types.h"
+
#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
struct drm_file;
@@ -25,6 +28,20 @@ struct i915_drm_client {
spinlock_t ctx_lock; /* For add/remove from ctx_list. */
struct list_head ctx_list; /* List of contexts belonging to client. */
+#ifdef CONFIG_PROC_FS
+ /**
+ * @objects_lock: lock protecting @objects_list
+ */
+ spinlock_t objects_lock;
+
+ /**
+ * @objects_list: list of objects created by this client
+ *
+ * Protected by @objects_lock.
+ */
+ struct list_head objects_list;
+#endif
+
/**
* @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
*/
@@ -49,4 +66,19 @@ struct i915_drm_client *i915_drm_client_alloc(void);
void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
+#ifdef CONFIG_PROC_FS
+void i915_drm_client_add_object(struct i915_drm_client *client,
+ struct drm_i915_gem_object *obj);
+bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj);
+#else
+static inline void i915_drm_client_add_object(struct i915_drm_client *client,
+ struct drm_i915_gem_object *obj)
+{
+}
+
+static inline bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
+{
+}
+#endif
+
#endif /* !__I915_DRM_CLIENT_H__ */
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 02/17] drm/i915: Record which client owns a VM
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
2023-07-12 11:45 ` [PATCH 01/17] drm/i915: Add ability for tracking buffer objects per client Tvrtko Ursulin
@ 2023-07-12 11:45 ` Tvrtko Ursulin
2023-07-12 11:45 ` [PATCH 03/17] drm/i915: Track page table backing store usage Tvrtko Ursulin
` (15 subsequent siblings)
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:45 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
To enable accounting of indirect client memory usage (such as page tables)
in the following patch, lets start recording the creator of each PPGTT.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_context.c | 11 ++++++++---
drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 3 +++
drivers/gpu/drm/i915/gem/selftests/mock_context.c | 4 ++--
drivers/gpu/drm/i915/gt/intel_gtt.h | 1 +
4 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 9a9ff84c90d7..35cf6608180e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -279,7 +279,8 @@ static int proto_context_set_protected(struct drm_i915_private *i915,
}
static struct i915_gem_proto_context *
-proto_context_create(struct drm_i915_private *i915, unsigned int flags)
+proto_context_create(struct drm_i915_file_private *fpriv,
+ struct drm_i915_private *i915, unsigned int flags)
{
struct i915_gem_proto_context *pc, *err;
@@ -287,6 +288,7 @@ proto_context_create(struct drm_i915_private *i915, unsigned int flags)
if (!pc)
return ERR_PTR(-ENOMEM);
+ pc->fpriv = fpriv;
pc->num_user_engines = -1;
pc->user_engines = NULL;
pc->user_flags = BIT(UCONTEXT_BANNABLE) |
@@ -1621,6 +1623,7 @@ i915_gem_create_context(struct drm_i915_private *i915,
err = PTR_ERR(ppgtt);
goto err_ctx;
}
+ ppgtt->vm.fpriv = pc->fpriv;
vm = &ppgtt->vm;
}
if (vm)
@@ -1740,7 +1743,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
/* 0 reserved for invalid/unassigned ppgtt */
xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1);
- pc = proto_context_create(i915, 0);
+ pc = proto_context_create(file_priv, i915, 0);
if (IS_ERR(pc)) {
err = PTR_ERR(pc);
goto err;
@@ -1822,6 +1825,7 @@ int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */
args->vm_id = id;
+ ppgtt->vm.fpriv = file_priv;
return 0;
err_put:
@@ -2284,7 +2288,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
return -EIO;
}
- ext_data.pc = proto_context_create(i915, args->flags);
+ ext_data.pc = proto_context_create(file->driver_priv, i915,
+ args->flags);
if (IS_ERR(ext_data.pc))
return PTR_ERR(ext_data.pc);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index cb78214a7dcd..c573c067779f 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -188,6 +188,9 @@ struct i915_gem_proto_engine {
* CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE.
*/
struct i915_gem_proto_context {
+ /** @fpriv: Client which creates the context */
+ struct drm_i915_file_private *fpriv;
+
/** @vm: See &i915_gem_context.vm */
struct i915_address_space *vm;
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
index 8ac6726ec16b..125584ada282 100644
--- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -83,7 +83,7 @@ live_context(struct drm_i915_private *i915, struct file *file)
int err;
u32 id;
- pc = proto_context_create(i915, 0);
+ pc = proto_context_create(fpriv, i915, 0);
if (IS_ERR(pc))
return ERR_CAST(pc);
@@ -152,7 +152,7 @@ kernel_context(struct drm_i915_private *i915,
struct i915_gem_context *ctx;
struct i915_gem_proto_context *pc;
- pc = proto_context_create(i915, 0);
+ pc = proto_context_create(NULL, i915, 0);
if (IS_ERR(pc))
return ERR_CAST(pc);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 4d6296cdbcfd..7192a534a654 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -248,6 +248,7 @@ struct i915_address_space {
struct drm_mm mm;
struct intel_gt *gt;
struct drm_i915_private *i915;
+ struct drm_i915_file_private *fpriv;
struct device *dma;
u64 total; /* size addr space maps (ex. 2GB for ggtt) */
u64 reserved; /* size addr space reserved */
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 03/17] drm/i915: Track page table backing store usage
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
2023-07-12 11:45 ` [PATCH 01/17] drm/i915: Add ability for tracking buffer objects per client Tvrtko Ursulin
2023-07-12 11:45 ` [PATCH 02/17] drm/i915: Record which client owns a VM Tvrtko Ursulin
@ 2023-07-12 11:45 ` Tvrtko Ursulin
2023-07-12 11:45 ` [PATCH 04/17] drm/i915: Account ring buffer and context state storage Tvrtko Ursulin
` (14 subsequent siblings)
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:45 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Account page table backing store against the owning client memory usage
stats.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
drivers/gpu/drm/i915/gt/intel_gtt.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 731d9f2bbc56..065099362a98 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -58,6 +58,9 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
if (!IS_ERR(obj)) {
obj->base.resv = i915_vm_resv_get(vm);
obj->shares_resv_from = vm;
+
+ if (vm->fpriv)
+ i915_drm_client_add_object(vm->fpriv->client, obj);
}
return obj;
@@ -79,6 +82,9 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
if (!IS_ERR(obj)) {
obj->base.resv = i915_vm_resv_get(vm);
obj->shares_resv_from = vm;
+
+ if (vm->fpriv)
+ i915_drm_client_add_object(vm->fpriv->client, obj);
}
return obj;
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 04/17] drm/i915: Account ring buffer and context state storage
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (2 preceding siblings ...)
2023-07-12 11:45 ` [PATCH 03/17] drm/i915: Track page table backing store usage Tvrtko Ursulin
@ 2023-07-12 11:45 ` Tvrtko Ursulin
2023-07-12 11:45 ` [PATCH 05/17] drm/i915: Implement fdinfo memory stats printing Tvrtko Ursulin
` (13 subsequent siblings)
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:45 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Account ring buffers and logical context space against the owning client
memory usage stats.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
---
drivers/gpu/drm/i915/gt/intel_context.c | 14 ++++++++++++++
drivers/gpu/drm/i915/i915_drm_client.c | 10 ++++++++++
drivers/gpu/drm/i915/i915_drm_client.h | 9 +++++++++
3 files changed, 33 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index a53b26178f0a..a2f1245741bb 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -6,6 +6,7 @@
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_pm.h"
+#include "i915_drm_client.h"
#include "i915_drv.h"
#include "i915_trace.h"
@@ -50,6 +51,7 @@ intel_context_create(struct intel_engine_cs *engine)
int intel_context_alloc_state(struct intel_context *ce)
{
+ struct i915_gem_context *ctx;
int err = 0;
if (mutex_lock_interruptible(&ce->pin_mutex))
@@ -66,6 +68,18 @@ int intel_context_alloc_state(struct intel_context *ce)
goto unlock;
set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+
+ rcu_read_lock();
+ ctx = rcu_dereference(ce->gem_context);
+ if (ctx && !kref_get_unless_zero(&ctx->ref))
+ ctx = NULL;
+ rcu_read_unlock();
+ if (ctx) {
+ if (ctx->client)
+ i915_drm_client_add_context_objects(ctx->client,
+ ce);
+ i915_gem_context_put(ctx);
+ }
}
unlock:
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 2e5e69edc0f9..a61356012df8 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -144,4 +144,14 @@ bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
return true;
}
+
+void i915_drm_client_add_context_objects(struct i915_drm_client *client,
+ struct intel_context *ce)
+{
+ if (ce->state)
+ i915_drm_client_add_object(client, ce->state->obj);
+
+ if (ce->ring != ce->engine->legacy.ring && ce->ring->vma)
+ i915_drm_client_add_object(client, ce->ring->vma->obj);
+}
#endif
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 5f58fdf7dcb8..69cedfcd3d69 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -14,6 +14,7 @@
#include "i915_file_private.h"
#include "gem/i915_gem_object_types.h"
+#include "gt/intel_context_types.h"
#define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
@@ -70,6 +71,8 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
void i915_drm_client_add_object(struct i915_drm_client *client,
struct drm_i915_gem_object *obj);
bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj);
+void i915_drm_client_add_context_objects(struct i915_drm_client *client,
+ struct intel_context *ce);
#else
static inline void i915_drm_client_add_object(struct i915_drm_client *client,
struct drm_i915_gem_object *obj)
@@ -79,6 +82,12 @@ static inline void i915_drm_client_add_object(struct i915_drm_client *client,
static inline bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj)
{
}
+
+static inline void
+i915_drm_client_add_context_objects(struct i915_drm_client *client,
+ struct intel_context *ce)
+{
+}
#endif
#endif /* !__I915_DRM_CLIENT_H__ */
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 05/17] drm/i915: Implement fdinfo memory stats printing
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (3 preceding siblings ...)
2023-07-12 11:45 ` [PATCH 04/17] drm/i915: Account ring buffer and context state storage Tvrtko Ursulin
@ 2023-07-12 11:45 ` Tvrtko Ursulin
2023-07-12 11:45 ` [PATCH 06/17] drm: Update file owner during use Tvrtko Ursulin
` (12 subsequent siblings)
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:45 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Use the newly added drm_print_memory_stats helper to show memory
utilisation of our objects in drm/driver specific fdinfo output.
To collect the stats we walk the per memory regions object lists
and accumulate object size into the respective drm_memory_stats
categories.
Objects with multiple possible placements are reported in multiple
regions for total and shared sizes, while other categories are
counted only for the currently active region.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Aravind Iddamsetty <aravind.iddamsetty@intel.com>
Cc: Rob Clark <robdclark@gmail.com>
---
drivers/gpu/drm/i915/i915_drm_client.c | 85 ++++++++++++++++++++++++++
1 file changed, 85 insertions(+)
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index a61356012df8..9e7a6075ee25 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -45,6 +45,89 @@ void __i915_drm_client_free(struct kref *kref)
}
#ifdef CONFIG_PROC_FS
+static void
+obj_meminfo(struct drm_i915_gem_object *obj,
+ struct drm_memory_stats stats[INTEL_REGION_UNKNOWN])
+{
+ struct intel_memory_region *mr;
+ u64 sz = obj->base.size;
+ enum intel_region_id id;
+ unsigned int i;
+
+ /* Attribute size and shared to all possible memory regions. */
+ for (i = 0; i < obj->mm.n_placements; i++) {
+ mr = obj->mm.placements[i];
+ id = mr->id;
+
+ if (obj->base.handle_count > 1)
+ stats[id].shared += sz;
+ else
+ stats[id].private += sz;
+ }
+
+ /* Attribute other categories to only the current region. */
+ mr = obj->mm.region;
+ if (mr)
+ id = mr->id;
+ else
+ id = INTEL_REGION_SMEM;
+
+ if (!obj->mm.n_placements) {
+ if (obj->base.handle_count > 1)
+ stats[id].shared += sz;
+ else
+ stats[id].private += sz;
+ }
+
+ if (i915_gem_object_has_pages(obj)) {
+ stats[id].resident += sz;
+
+ if (!dma_resv_test_signaled(obj->base.resv,
+ dma_resv_usage_rw(true)))
+ stats[id].active += sz;
+ else if (i915_gem_object_is_shrinkable(obj) &&
+ obj->mm.madv == I915_MADV_DONTNEED)
+ stats[id].purgeable += sz;
+ }
+}
+
+static void show_meminfo(struct drm_printer *p, struct drm_file *file)
+{
+ struct drm_memory_stats stats[INTEL_REGION_UNKNOWN] = {};
+ struct drm_i915_file_private *fpriv = file->driver_priv;
+ struct i915_drm_client *client = fpriv->client;
+ struct drm_i915_private *i915 = fpriv->i915;
+ struct drm_i915_gem_object *obj;
+ struct intel_memory_region *mr;
+ struct list_head *pos;
+ unsigned int id;
+
+ /* Public objects. */
+ spin_lock(&file->table_lock);
+ idr_for_each_entry(&file->object_idr, obj, id)
+ obj_meminfo(obj, stats);
+ spin_unlock(&file->table_lock);
+
+ /* Internal objects. */
+ rcu_read_lock();
+ list_for_each_rcu(pos, &client->objects_list) {
+ obj = i915_gem_object_get_rcu(list_entry(pos, typeof(*obj),
+ client_link));
+ if (!obj)
+ continue;
+ obj_meminfo(obj, stats);
+ i915_gem_object_put(obj);
+ }
+ rcu_read_unlock();
+
+ for_each_memory_region(mr, i915, id)
+ drm_print_memory_stats(p,
+ &stats[id],
+ DRM_GEM_OBJECT_RESIDENT |
+ DRM_GEM_OBJECT_PURGEABLE,
+ mr->name);
+}
+
static const char * const uabi_class_names[] = {
[I915_ENGINE_CLASS_RENDER] = "render",
[I915_ENGINE_CLASS_COPY] = "copy",
@@ -106,6 +189,8 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
* ******************************************************************
*/
+ show_meminfo(p, file);
+
if (GRAPHICS_VER(i915) < 8)
return;
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 06/17] drm: Update file owner during use
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (4 preceding siblings ...)
2023-07-12 11:45 ` [PATCH 05/17] drm/i915: Implement fdinfo memory stats printing Tvrtko Ursulin
@ 2023-07-12 11:45 ` Tvrtko Ursulin
2023-07-12 11:45 ` [PATCH 07/17] cgroup: Add the DRM cgroup controller Tvrtko Ursulin
` (11 subsequent siblings)
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:45 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Daniel Vetter,
Johannes Weiner, linux-kernel, Stéphane Marchesin,
Christian König, Zefan Li, Dave Airlie, Tejun Heo, cgroups,
T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
With the typical model where the display server opens the file descriptor
and then hands it over to the client(*), we were showing stale data in
debugfs.
Fix it by updating the drm_file->pid on ioctl access from a different
process.
The field is also made RCU protected to allow for lockless readers. Update
side is protected with dev->filelist_mutex.
Before:
$ cat /sys/kernel/debug/dri/0/clients
command pid dev master a uid magic
Xorg 2344 0 y y 0 0
Xorg 2344 0 n y 0 2
Xorg 2344 0 n y 0 3
Xorg 2344 0 n y 0 4
After:
$ cat /sys/kernel/debug/dri/0/clients
command tgid dev master a uid magic
Xorg 830 0 y y 0 0
xfce4-session 880 0 n y 0 1
xfwm4 943 0 n y 0 2
neverball 1095 0 n y 0 3
*)
More detailed and historically accurate description of various handover
implementation kindly provided by Emil Velikov:
"""
The traditional model, the server was the orchestrator managing the
primary device node. From the fd, to the master status and
authentication. But looking at the fd alone, this has varied across
the years.
IIRC in the DRI1 days, Xorg (libdrm really) would have a list of open
fd(s) and reuse those whenever needed, DRI2 the client was responsible
for open() themselves and with DRI3 the fd was passed to the client.
Around the inception of DRI3 and systemd-logind, the latter became
another possible orchestrator. Whereby Xorg and Wayland compositors
could ask it for the fd. For various reasons (hysterical and genuine
ones) Xorg has a fallback path going the open(), whereas Wayland
compositors are moving to solely relying on logind... some never had
fallback even.
Over the past few years, more projects have emerged which provide
functionality similar (be that on API level, Dbus, or otherwise) to
systemd-logind.
"""
v2:
* Fixed typo in commit text and added a fine historical explanation
from Emil.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Emil Velikov <emil.l.velikov@gmail.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 6 ++--
drivers/gpu/drm/drm_auth.c | 3 +-
drivers/gpu/drm/drm_debugfs.c | 10 ++++---
drivers/gpu/drm/drm_file.c | 40 +++++++++++++++++++++++--
drivers/gpu/drm/drm_ioctl.c | 3 ++
drivers/gpu/drm/nouveau/nouveau_drm.c | 5 +++-
drivers/gpu/drm/vmwgfx/vmwgfx_gem.c | 6 ++--
include/drm/drm_file.h | 13 ++++++--
8 files changed, 71 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 74055cba3dc9..849097dff02b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -963,6 +963,7 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
list_for_each_entry(file, &dev->filelist, lhead) {
struct task_struct *task;
struct drm_gem_object *gobj;
+ struct pid *pid;
int id;
/*
@@ -972,8 +973,9 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused)
* Therefore, we need to protect this ->comm access using RCU.
*/
rcu_read_lock();
- task = pid_task(file->pid, PIDTYPE_TGID);
- seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
+ pid = rcu_dereference(file->pid);
+ task = pid_task(pid, PIDTYPE_TGID);
+ seq_printf(m, "pid %8d command %s:\n", pid_nr(pid),
task ? task->comm : "<unknown>");
rcu_read_unlock();
diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index cf92a9ae8034..2ed2585ded37 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -235,7 +235,8 @@ static int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv)
static int
drm_master_check_perm(struct drm_device *dev, struct drm_file *file_priv)
{
- if (file_priv->pid == task_pid(current) && file_priv->was_master)
+ if (file_priv->was_master &&
+ rcu_access_pointer(file_priv->pid) == task_pid(current))
return 0;
if (!capable(CAP_SYS_ADMIN))
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 4855230ba2c6..b46f5ceb24c6 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -90,15 +90,17 @@ static int drm_clients_info(struct seq_file *m, void *data)
*/
mutex_lock(&dev->filelist_mutex);
list_for_each_entry_reverse(priv, &dev->filelist, lhead) {
- struct task_struct *task;
bool is_current_master = drm_is_current_master(priv);
+ struct task_struct *task;
+ struct pid *pid;
- rcu_read_lock(); /* locks pid_task()->comm */
- task = pid_task(priv->pid, PIDTYPE_TGID);
+ rcu_read_lock(); /* Locks priv->pid and pid_task()->comm! */
+ pid = rcu_dereference(priv->pid);
+ task = pid_task(pid, PIDTYPE_TGID);
uid = task ? __task_cred(task)->euid : GLOBAL_ROOT_UID;
seq_printf(m, "%20s %5d %3d %c %c %5d %10u\n",
task ? task->comm : "<unknown>",
- pid_vnr(priv->pid),
+ pid_vnr(pid),
priv->minor->index,
is_current_master ? 'y' : 'n',
priv->authenticated ? 'y' : 'n',
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 883d83bc0e3d..e692770ef6d3 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -160,7 +160,7 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
/* Get a unique identifier for fdinfo: */
file->client_id = atomic64_inc_return(&ident);
- file->pid = get_pid(task_tgid(current));
+ rcu_assign_pointer(file->pid, get_pid(task_tgid(current)));
file->minor = minor;
/* for compatibility root is always authenticated */
@@ -200,7 +200,7 @@ struct drm_file *drm_file_alloc(struct drm_minor *minor)
drm_syncobj_release(file);
if (drm_core_check_feature(dev, DRIVER_GEM))
drm_gem_release(dev, file);
- put_pid(file->pid);
+ put_pid(rcu_access_pointer(file->pid));
kfree(file);
return ERR_PTR(ret);
@@ -291,7 +291,7 @@ void drm_file_free(struct drm_file *file)
WARN_ON(!list_empty(&file->event_list));
- put_pid(file->pid);
+ put_pid(rcu_access_pointer(file->pid));
kfree(file);
}
@@ -505,6 +505,40 @@ int drm_release(struct inode *inode, struct file *filp)
}
EXPORT_SYMBOL(drm_release);
+void drm_file_update_pid(struct drm_file *filp)
+{
+ struct drm_device *dev;
+ struct pid *pid, *old;
+
+ /*
+ * Master nodes need to keep the original ownership in order for
+ * drm_master_check_perm to keep working correctly. (See comment in
+ * drm_auth.c.)
+ */
+ if (filp->was_master)
+ return;
+
+ pid = task_tgid(current);
+
+ /*
+ * Quick unlocked check since the model is a single handover followed by
+ * exclusive repeated use.
+ */
+ if (pid == rcu_access_pointer(filp->pid))
+ return;
+
+ dev = filp->minor->dev;
+ mutex_lock(&dev->filelist_mutex);
+ old = rcu_replace_pointer(filp->pid, pid, 1);
+ mutex_unlock(&dev->filelist_mutex);
+
+ if (pid != old) {
+ get_pid(pid);
+ synchronize_rcu();
+ put_pid(old);
+ }
+}
+
/**
* drm_release_noglobal - release method for DRM file
* @inode: device inode
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 8e9afe7af19c..78f9387e79fe 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -774,6 +774,9 @@ long drm_ioctl_kernel(struct file *file, drm_ioctl_t *func, void *kdata,
struct drm_device *dev = file_priv->minor->dev;
int retcode;
+ /* Update drm_file owner if fd was passed along. */
+ drm_file_update_pid(file_priv);
+
if (drm_dev_is_unplugged(dev))
return -ENODEV;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index ca3bb8075357..cd15759815a8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -1101,7 +1101,10 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv)
}
get_task_comm(tmpname, current);
- snprintf(name, sizeof(name), "%s[%d]", tmpname, pid_nr(fpriv->pid));
+ rcu_read_lock();
+ snprintf(name, sizeof(name), "%s[%d]",
+ tmpname, pid_nr(rcu_dereference(fpriv->pid)));
+ rcu_read_unlock();
if (!(cli = kzalloc(sizeof(*cli), GFP_KERNEL))) {
ret = -ENOMEM;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
index c0da89e16e6f..a07e5b7e2f2f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
@@ -232,6 +232,7 @@ static int vmw_debugfs_gem_info_show(struct seq_file *m, void *unused)
list_for_each_entry(file, &dev->filelist, lhead) {
struct task_struct *task;
struct drm_gem_object *gobj;
+ struct pid *pid;
int id;
/*
@@ -241,8 +242,9 @@ static int vmw_debugfs_gem_info_show(struct seq_file *m, void *unused)
* Therefore, we need to protect this ->comm access using RCU.
*/
rcu_read_lock();
- task = pid_task(file->pid, PIDTYPE_TGID);
- seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
+ pid = rcu_dereference(file->pid);
+ task = pid_task(pid, PIDTYPE_TGID);
+ seq_printf(m, "pid %8d command %s:\n", pid_nr(pid),
task ? task->comm : "<unknown>");
rcu_read_unlock();
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index 966912053cb0..c76249d5467e 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -256,8 +256,15 @@ struct drm_file {
/** @master_lookup_lock: Serializes @master. */
spinlock_t master_lookup_lock;
- /** @pid: Process that opened this file. */
- struct pid *pid;
+ /**
+ * @pid: Process that is using this file.
+ *
+ * Must only be dereferenced under a rcu_read_lock or equivalent.
+ *
+ * Updates are guarded with dev->filelist_mutex and reference must be
+ * dropped after a RCU grace period to accommodate lockless readers.
+ */
+ struct pid __rcu *pid;
/** @client_id: A unique id for fdinfo */
u64 client_id;
@@ -420,6 +427,8 @@ static inline bool drm_is_accel_client(const struct drm_file *file_priv)
return file_priv->minor->type == DRM_MINOR_ACCEL;
}
+void drm_file_update_pid(struct drm_file *);
+
int drm_open(struct inode *inode, struct file *filp);
int drm_open_helper(struct file *filp, struct drm_minor *minor);
ssize_t drm_read(struct file *filp, char __user *buffer,
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 07/17] cgroup: Add the DRM cgroup controller
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (5 preceding siblings ...)
2023-07-12 11:45 ` [PATCH 06/17] drm: Update file owner during use Tvrtko Ursulin
@ 2023-07-12 11:45 ` Tvrtko Ursulin
2023-07-12 11:45 ` [PATCH 08/17] drm/cgroup: Track DRM clients per cgroup Tvrtko Ursulin
` (10 subsequent siblings)
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:45 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Skeleton controller without any functionality.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
include/linux/cgroup_drm.h | 9 ++++++
include/linux/cgroup_subsys.h | 4 +++
init/Kconfig | 7 ++++
kernel/cgroup/Makefile | 1 +
kernel/cgroup/drm.c | 60 +++++++++++++++++++++++++++++++++++
5 files changed, 81 insertions(+)
create mode 100644 include/linux/cgroup_drm.h
create mode 100644 kernel/cgroup/drm.c
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
new file mode 100644
index 000000000000..8ef66a47619f
--- /dev/null
+++ b/include/linux/cgroup_drm.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _CGROUP_DRM_H
+#define _CGROUP_DRM_H
+
+#endif /* _CGROUP_DRM_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 445235487230..49460494a010 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -65,6 +65,10 @@ SUBSYS(rdma)
SUBSYS(misc)
#endif
+#if IS_ENABLED(CONFIG_CGROUP_DRM)
+SUBSYS(drm)
+#endif
+
/*
* The following subsystems are not supported on the default hierarchy.
*/
diff --git a/init/Kconfig b/init/Kconfig
index f7f65af4ee12..485cff856c0b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1065,6 +1065,13 @@ config CGROUP_RDMA
Attaching processes with active RDMA resources to the cgroup
hierarchy is allowed even if can cross the hierarchy's limit.
+config CGROUP_DRM
+ bool "DRM controller"
+ help
+ Provides the DRM subsystem controller.
+
+ ...
+
config CGROUP_FREEZER
bool "Freezer controller"
help
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index 12f8457ad1f9..849bd2917477 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -6,4 +6,5 @@ obj-$(CONFIG_CGROUP_PIDS) += pids.o
obj-$(CONFIG_CGROUP_RDMA) += rdma.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_CGROUP_MISC) += misc.o
+obj-$(CONFIG_CGROUP_DRM) += drm.o
obj-$(CONFIG_CGROUP_DEBUG) += debug.o
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
new file mode 100644
index 000000000000..02c8eaa633d3
--- /dev/null
+++ b/kernel/cgroup/drm.c
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <linux/cgroup.h>
+#include <linux/cgroup_drm.h>
+#include <linux/slab.h>
+
+struct drm_cgroup_state {
+ struct cgroup_subsys_state css;
+};
+
+struct drm_root_cgroup_state {
+ struct drm_cgroup_state drmcs;
+};
+
+static struct drm_root_cgroup_state root_drmcs;
+
+static inline struct drm_cgroup_state *
+css_to_drmcs(struct cgroup_subsys_state *css)
+{
+ return container_of(css, struct drm_cgroup_state, css);
+}
+
+static void drmcs_free(struct cgroup_subsys_state *css)
+{
+ struct drm_cgroup_state *drmcs = css_to_drmcs(css);
+
+ if (drmcs != &root_drmcs.drmcs)
+ kfree(drmcs);
+}
+
+static struct cgroup_subsys_state *
+drmcs_alloc(struct cgroup_subsys_state *parent_css)
+{
+ struct drm_cgroup_state *drmcs;
+
+ if (!parent_css) {
+ drmcs = &root_drmcs.drmcs;
+ } else {
+ drmcs = kzalloc(sizeof(*drmcs), GFP_KERNEL);
+ if (!drmcs)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return &drmcs->css;
+}
+
+struct cftype files[] = {
+ { } /* Zero entry terminates. */
+};
+
+struct cgroup_subsys drm_cgrp_subsys = {
+ .css_alloc = drmcs_alloc,
+ .css_free = drmcs_free,
+ .early_init = false,
+ .legacy_cftypes = files,
+ .dfl_cftypes = files,
+};
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 08/17] drm/cgroup: Track DRM clients per cgroup
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (6 preceding siblings ...)
2023-07-12 11:45 ` [PATCH 07/17] cgroup: Add the DRM cgroup controller Tvrtko Ursulin
@ 2023-07-12 11:45 ` Tvrtko Ursulin
2023-07-21 22:14 ` Tejun Heo
2023-07-12 11:45 ` [PATCH 09/17] drm/cgroup: Add ability to query drm cgroup GPU time Tvrtko Ursulin
` (9 subsequent siblings)
17 siblings, 1 reply; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:45 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
To enable propagation of settings from the cgroup DRM controller to DRM
and vice-versa, we need to start tracking to which cgroups DRM clients
belong.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/drm_file.c | 6 ++++
include/drm/drm_file.h | 6 ++++
include/linux/cgroup_drm.h | 20 ++++++++++++
kernel/cgroup/drm.c | 62 +++++++++++++++++++++++++++++++++++++-
4 files changed, 93 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index e692770ef6d3..794ffb487472 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -32,6 +32,7 @@
*/
#include <linux/anon_inodes.h>
+#include <linux/cgroup_drm.h>
#include <linux/dma-fence.h>
#include <linux/file.h>
#include <linux/module.h>
@@ -304,6 +305,8 @@ static void drm_close_helper(struct file *filp)
list_del(&file_priv->lhead);
mutex_unlock(&dev->filelist_mutex);
+ drmcgroup_client_close(file_priv);
+
drm_file_free(file_priv);
}
@@ -367,6 +370,8 @@ int drm_open_helper(struct file *filp, struct drm_minor *minor)
list_add(&priv->lhead, &dev->filelist);
mutex_unlock(&dev->filelist_mutex);
+ drmcgroup_client_open(priv);
+
#ifdef CONFIG_DRM_LEGACY
#ifdef __alpha__
/*
@@ -533,6 +538,7 @@ void drm_file_update_pid(struct drm_file *filp)
mutex_unlock(&dev->filelist_mutex);
if (pid != old) {
+ drmcgroup_client_migrate(filp);
get_pid(pid);
synchronize_rcu();
put_pid(old);
diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h
index c76249d5467e..e8fb3a39d482 100644
--- a/include/drm/drm_file.h
+++ b/include/drm/drm_file.h
@@ -30,6 +30,7 @@
#ifndef _DRM_FILE_H_
#define _DRM_FILE_H_
+#include <linux/cgroup.h>
#include <linux/types.h>
#include <linux/completion.h>
#include <linux/idr.h>
@@ -283,6 +284,11 @@ struct drm_file {
/** @minor: &struct drm_minor for this file. */
struct drm_minor *minor;
+#if IS_ENABLED(CONFIG_CGROUP_DRM)
+ struct cgroup_subsys_state *__css;
+ struct list_head clink;
+#endif
+
/**
* @object_idr:
*
diff --git a/include/linux/cgroup_drm.h b/include/linux/cgroup_drm.h
index 8ef66a47619f..176431842d8e 100644
--- a/include/linux/cgroup_drm.h
+++ b/include/linux/cgroup_drm.h
@@ -6,4 +6,24 @@
#ifndef _CGROUP_DRM_H
#define _CGROUP_DRM_H
+#include <drm/drm_file.h>
+
+#if IS_ENABLED(CONFIG_CGROUP_DRM)
+void drmcgroup_client_open(struct drm_file *file_priv);
+void drmcgroup_client_close(struct drm_file *file_priv);
+void drmcgroup_client_migrate(struct drm_file *file_priv);
+#else
+static inline void drmcgroup_client_open(struct drm_file *file_priv)
+{
+}
+
+static inline void drmcgroup_client_close(struct drm_file *file_priv)
+{
+}
+
+static void drmcgroup_client_migrate(struct drm_file *file_priv)
+{
+}
+#endif
+
#endif /* _CGROUP_DRM_H */
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 02c8eaa633d3..d702be1b441f 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -5,17 +5,25 @@
#include <linux/cgroup.h>
#include <linux/cgroup_drm.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
#include <linux/slab.h>
struct drm_cgroup_state {
struct cgroup_subsys_state css;
+
+ struct list_head clients;
};
struct drm_root_cgroup_state {
struct drm_cgroup_state drmcs;
};
-static struct drm_root_cgroup_state root_drmcs;
+static struct drm_root_cgroup_state root_drmcs = {
+ .drmcs.clients = LIST_HEAD_INIT(root_drmcs.drmcs.clients),
+};
+
+static DEFINE_MUTEX(drmcg_mutex);
static inline struct drm_cgroup_state *
css_to_drmcs(struct cgroup_subsys_state *css)
@@ -42,11 +50,63 @@ drmcs_alloc(struct cgroup_subsys_state *parent_css)
drmcs = kzalloc(sizeof(*drmcs), GFP_KERNEL);
if (!drmcs)
return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&drmcs->clients);
}
return &drmcs->css;
}
+void drmcgroup_client_open(struct drm_file *file_priv)
+{
+ struct drm_cgroup_state *drmcs;
+
+ drmcs = css_to_drmcs(task_get_css(current, drm_cgrp_id));
+
+ mutex_lock(&drmcg_mutex);
+ file_priv->__css = &drmcs->css; /* Keeps the reference. */
+ list_add_tail(&file_priv->clink, &drmcs->clients);
+ mutex_unlock(&drmcg_mutex);
+}
+EXPORT_SYMBOL_GPL(drmcgroup_client_open);
+
+void drmcgroup_client_close(struct drm_file *file_priv)
+{
+ struct drm_cgroup_state *drmcs;
+
+ drmcs = css_to_drmcs(file_priv->__css);
+
+ mutex_lock(&drmcg_mutex);
+ list_del(&file_priv->clink);
+ file_priv->__css = NULL;
+ mutex_unlock(&drmcg_mutex);
+
+ css_put(&drmcs->css);
+}
+EXPORT_SYMBOL_GPL(drmcgroup_client_close);
+
+void drmcgroup_client_migrate(struct drm_file *file_priv)
+{
+ struct drm_cgroup_state *src, *dst;
+ struct cgroup_subsys_state *old;
+
+ mutex_lock(&drmcg_mutex);
+
+ old = file_priv->__css;
+ src = css_to_drmcs(old);
+ dst = css_to_drmcs(task_get_css(current, drm_cgrp_id));
+
+ if (src != dst) {
+ file_priv->__css = &dst->css; /* Keeps the reference. */
+ list_move_tail(&file_priv->clink, &dst->clients);
+ }
+
+ mutex_unlock(&drmcg_mutex);
+
+ css_put(old);
+}
+EXPORT_SYMBOL_GPL(drmcgroup_client_migrate);
+
struct cftype files[] = {
{ } /* Zero entry terminates. */
};
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* Re: [PATCH 08/17] drm/cgroup: Track DRM clients per cgroup
2023-07-12 11:45 ` [PATCH 08/17] drm/cgroup: Track DRM clients per cgroup Tvrtko Ursulin
@ 2023-07-21 22:14 ` Tejun Heo
0 siblings, 0 replies; 39+ messages in thread
From: Tejun Heo @ 2023-07-21 22:14 UTC (permalink / raw)
To: Tvrtko Ursulin
Cc: Rob Clark, Kenny.Ho, Dave Airlie, Stéphane Marchesin,
Daniel Vetter, Intel-gfx, linux-kernel, dri-devel,
Christian König, Zefan Li, Johannes Weiner, cgroups,
T . J . Mercier
Hello,
On Wed, Jul 12, 2023 at 12:45:56PM +0100, Tvrtko Ursulin wrote:
> +void drmcgroup_client_migrate(struct drm_file *file_priv)
> +{
> + struct drm_cgroup_state *src, *dst;
> + struct cgroup_subsys_state *old;
> +
> + mutex_lock(&drmcg_mutex);
> +
> + old = file_priv->__css;
> + src = css_to_drmcs(old);
> + dst = css_to_drmcs(task_get_css(current, drm_cgrp_id));
> +
> + if (src != dst) {
> + file_priv->__css = &dst->css; /* Keeps the reference. */
> + list_move_tail(&file_priv->clink, &dst->clients);
> + }
> +
> + mutex_unlock(&drmcg_mutex);
> +
> + css_put(old);
> +}
> +EXPORT_SYMBOL_GPL(drmcgroup_client_migrate);
So, you're implicitly migrating the fd to the latest ioctl user on the first
access. While this may work for state-less control like usage time. This
likely will cause problem if you later want to add stateful control for
memory consumption. e.g. What happens if the new destination doesn't have
enough budget to accommodate the fd's usage? Let's say we allow over-commit
with follow-up reclaim or oom kill actions, if so, can we guarantee that all
memory ownership for can always be updated? If not, what happens after
failure?
If DRM needs to transfer fd ownership with resources attached to it, it
likely would be a good idea to make that an explicit operation so that the
attempt can be verified and failed if necessary.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 39+ messages in thread
* [PATCH 09/17] drm/cgroup: Add ability to query drm cgroup GPU time
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (7 preceding siblings ...)
2023-07-12 11:45 ` [PATCH 08/17] drm/cgroup: Track DRM clients per cgroup Tvrtko Ursulin
@ 2023-07-12 11:45 ` Tvrtko Ursulin
2023-07-12 11:45 ` [PATCH 10/17] drm/cgroup: Add over budget signalling callback Tvrtko Ursulin
` (8 subsequent siblings)
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:45 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Add a driver callback and core helper which allow querying the time spent
on GPUs for processes belonging to a group.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
include/drm/drm_drv.h | 28 ++++++++++++++++++++++++++++
kernel/cgroup/drm.c | 20 ++++++++++++++++++++
2 files changed, 48 insertions(+)
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index b77f2c7275b7..3116fa4dbc48 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -151,6 +151,24 @@ enum drm_driver_feature {
DRIVER_HAVE_IRQ = BIT(30),
};
+/**
+ * struct drm_cgroup_ops
+ *
+ * This structure contains a number of callbacks that drivers can provide if
+ * they are able to support one or more of the functionalities implemented by
+ * the DRM cgroup controller.
+ */
+struct drm_cgroup_ops {
+ /**
+ * @active_time_us:
+ *
+ * Optional callback for reporting the GPU time consumed by this client.
+ *
+ * Used by the DRM core when queried by the DRM cgroup controller.
+ */
+ u64 (*active_time_us) (struct drm_file *);
+};
+
/**
* struct drm_driver - DRM driver structure
*
@@ -428,6 +446,16 @@ struct drm_driver {
*/
const struct file_operations *fops;
+#ifdef CONFIG_CGROUP_DRM
+ /**
+ * @cg_ops:
+ *
+ * Optional pointer to driver callbacks facilitating integration with
+ * the DRM cgroup controller.
+ */
+ const struct drm_cgroup_ops *cg_ops;
+#endif
+
#ifdef CONFIG_DRM_LEGACY
/* Everything below here is for legacy driver, never use! */
/* private: */
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index d702be1b441f..acdb76635b60 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -9,6 +9,8 @@
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <drm/drm_drv.h>
+
struct drm_cgroup_state {
struct cgroup_subsys_state css;
@@ -31,6 +33,24 @@ css_to_drmcs(struct cgroup_subsys_state *css)
return container_of(css, struct drm_cgroup_state, css);
}
+static u64 drmcs_get_active_time_us(struct drm_cgroup_state *drmcs)
+{
+ struct drm_file *fpriv;
+ u64 total = 0;
+
+ lockdep_assert_held(&drmcg_mutex);
+
+ list_for_each_entry(fpriv, &drmcs->clients, clink) {
+ const struct drm_cgroup_ops *cg_ops =
+ fpriv->minor->dev->driver->cg_ops;
+
+ if (cg_ops && cg_ops->active_time_us)
+ total += cg_ops->active_time_us(fpriv);
+ }
+
+ return total;
+}
+
static void drmcs_free(struct cgroup_subsys_state *css)
{
struct drm_cgroup_state *drmcs = css_to_drmcs(css);
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 10/17] drm/cgroup: Add over budget signalling callback
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (8 preceding siblings ...)
2023-07-12 11:45 ` [PATCH 09/17] drm/cgroup: Add ability to query drm cgroup GPU time Tvrtko Ursulin
@ 2023-07-12 11:45 ` Tvrtko Ursulin
2023-07-12 11:45 ` [PATCH 11/17] drm/cgroup: Only track clients which are providing drm_cgroup_ops Tvrtko Ursulin
` (7 subsequent siblings)
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:45 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Add a new callback via which the drm cgroup controller is notifying the
drm core that a certain process is above its allotted GPU time.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
include/drm/drm_drv.h | 8 ++++++++
kernel/cgroup/drm.c | 16 ++++++++++++++++
2 files changed, 24 insertions(+)
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 3116fa4dbc48..29e11a87bf75 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -167,6 +167,14 @@ struct drm_cgroup_ops {
* Used by the DRM core when queried by the DRM cgroup controller.
*/
u64 (*active_time_us) (struct drm_file *);
+
+ /**
+ * @signal_budget:
+ *
+ * Optional callback used by the DRM core to forward over/under GPU time
+ * messages sent by the DRM cgroup controller.
+ */
+ int (*signal_budget) (struct drm_file *, u64 used, u64 budget);
};
/**
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index acdb76635b60..68f31797c4f0 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -51,6 +51,22 @@ static u64 drmcs_get_active_time_us(struct drm_cgroup_state *drmcs)
return total;
}
+static void
+drmcs_signal_budget(struct drm_cgroup_state *drmcs, u64 usage, u64 budget)
+{
+ struct drm_file *fpriv;
+
+ lockdep_assert_held(&drmcg_mutex);
+
+ list_for_each_entry(fpriv, &drmcs->clients, clink) {
+ const struct drm_cgroup_ops *cg_ops =
+ fpriv->minor->dev->driver->cg_ops;
+
+ if (cg_ops && cg_ops->signal_budget)
+ cg_ops->signal_budget(fpriv, usage, budget);
+ }
+}
+
static void drmcs_free(struct cgroup_subsys_state *css)
{
struct drm_cgroup_state *drmcs = css_to_drmcs(css);
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 11/17] drm/cgroup: Only track clients which are providing drm_cgroup_ops
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (9 preceding siblings ...)
2023-07-12 11:45 ` [PATCH 10/17] drm/cgroup: Add over budget signalling callback Tvrtko Ursulin
@ 2023-07-12 11:45 ` Tvrtko Ursulin
2023-07-12 11:46 ` [PATCH 12/17] cgroup/drm: Introduce weight based drm cgroup control Tvrtko Ursulin
` (6 subsequent siblings)
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:45 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
To reduce the number of tracking going on, especially with drivers which
will not support any sort of control from the drm cgroup controller side,
lets express the funcionality as opt-in and use the presence of
drm_cgroup_ops as activation criteria.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
kernel/cgroup/drm.c | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 68f31797c4f0..60e1f3861576 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -97,6 +97,9 @@ void drmcgroup_client_open(struct drm_file *file_priv)
{
struct drm_cgroup_state *drmcs;
+ if (!file_priv->minor->dev->driver->cg_ops)
+ return;
+
drmcs = css_to_drmcs(task_get_css(current, drm_cgrp_id));
mutex_lock(&drmcg_mutex);
@@ -112,6 +115,9 @@ void drmcgroup_client_close(struct drm_file *file_priv)
drmcs = css_to_drmcs(file_priv->__css);
+ if (!file_priv->minor->dev->driver->cg_ops)
+ return;
+
mutex_lock(&drmcg_mutex);
list_del(&file_priv->clink);
file_priv->__css = NULL;
@@ -126,6 +132,9 @@ void drmcgroup_client_migrate(struct drm_file *file_priv)
struct drm_cgroup_state *src, *dst;
struct cgroup_subsys_state *old;
+ if (!file_priv->minor->dev->driver->cg_ops)
+ return;
+
mutex_lock(&drmcg_mutex);
old = file_priv->__css;
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 12/17] cgroup/drm: Introduce weight based drm cgroup control
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (10 preceding siblings ...)
2023-07-12 11:45 ` [PATCH 11/17] drm/cgroup: Only track clients which are providing drm_cgroup_ops Tvrtko Ursulin
@ 2023-07-12 11:46 ` Tvrtko Ursulin
2023-07-21 22:17 ` Tejun Heo
2023-07-12 11:46 ` [PATCH 13/17] drm/i915: Wire up with drm controller GPU time query Tvrtko Ursulin
` (5 subsequent siblings)
17 siblings, 1 reply; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:46 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Michal Koutný,
Zefan Li, Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Similar to CPU scheduling, implement a concept of weight in the drm cgroup
controller.
Uses the same range and default as the CPU controller - CGROUP_WEIGHT_MIN,
CGROUP_WEIGHT_DFL and CGROUP_WEIGHT_MAX.
Later each cgroup is assigned a time budget proportionaly based on the
relative weights of it's siblings. This time budget is in turn split by
the group's children and so on.
This will be used to implement a soft, or best effort signal from drm
cgroup to drm core notifying about groups which are over their allotted
budget.
No guarantees that the limit can be enforced are provided or implied.
Checking of GPU usage is done periodically by the controller which can be
configured via drmcg_period_ms kernel boot parameter and which defaults
to 2s.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Michal Koutn√Ω <mkoutny@suse.com>
Cc: Tejun Heo <tj@kernel.org>
---
Documentation/admin-guide/cgroup-v2.rst | 31 ++
kernel/cgroup/drm.c | 409 +++++++++++++++++++++++-
2 files changed, 437 insertions(+), 3 deletions(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 4ef890191196..da350858c59f 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2418,6 +2418,37 @@ HugeTLB Interface Files
hugetlb pages of <hugepagesize> in this cgroup. Only active in
use hugetlb pages are included. The per-node values are in bytes.
+DRM
+---
+
+The DRM controller allows configuring scheduling soft limits.
+
+DRM scheduling soft limits
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Because of the heterogenous hardware and driver DRM capabilities, soft limits
+are implemented as a loose co-operative (bi-directional) interface between the
+controller and DRM core.
+
+The controller configures the GPU time allowed per group and periodically scans
+the belonging tasks to detect the over budget condition, at which point it
+invokes a callback notifying the DRM core of the condition.
+
+DRM core provides an API to query per process GPU utilization and 2nd API to
+receive notification from the cgroup controller when the group enters or exits
+the over budget condition.
+
+Individual DRM drivers which implement the interface are expected to act on this
+in the best-effort manner only. There are no guarantees that the soft limits
+will be respected.
+
+DRM scheduling soft limits interface files
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ drm.weight
+ Standard cgroup weight based control [1, 10000] used to configure the
+ relative distributing of GPU time between the sibling groups.
+
Misc
----
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 60e1f3861576..b244e3d828cc 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -6,7 +6,9 @@
#include <linux/cgroup.h>
#include <linux/cgroup_drm.h>
#include <linux/list.h>
+#include <linux/moduleparam.h>
#include <linux/mutex.h>
+#include <linux/signal.h>
#include <linux/slab.h>
#include <drm/drm_drv.h>
@@ -15,10 +17,28 @@ struct drm_cgroup_state {
struct cgroup_subsys_state css;
struct list_head clients;
+
+ unsigned int weight;
+
+ unsigned int sum_children_weights;
+
+ bool over;
+ bool over_budget;
+
+ u64 per_s_budget_us;
+ u64 prev_active_us;
+ u64 active_us;
};
struct drm_root_cgroup_state {
struct drm_cgroup_state drmcs;
+
+ unsigned int period_us;
+
+ unsigned int last_scan_duration_us;
+ ktime_t prev_timestamp;
+
+ struct delayed_work scan_work;
};
static struct drm_root_cgroup_state root_drmcs = {
@@ -27,6 +47,9 @@ static struct drm_root_cgroup_state root_drmcs = {
static DEFINE_MUTEX(drmcg_mutex);
+static int drmcg_period_ms = 2000;
+module_param(drmcg_period_ms, int, 0644);
+
static inline struct drm_cgroup_state *
css_to_drmcs(struct cgroup_subsys_state *css)
{
@@ -67,12 +90,263 @@ drmcs_signal_budget(struct drm_cgroup_state *drmcs, u64 usage, u64 budget)
}
}
+static u64
+drmcs_read_weight(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ struct drm_cgroup_state *drmcs = css_to_drmcs(css);
+
+ return drmcs->weight;
+}
+
+static int
+drmcs_write_weight(struct cgroup_subsys_state *css, struct cftype *cftype,
+ u64 weight)
+{
+ struct drm_cgroup_state *drmcs = css_to_drmcs(css);
+ int ret;
+
+ if (weight < CGROUP_WEIGHT_MIN || weight > CGROUP_WEIGHT_MAX)
+ return -ERANGE;
+
+ ret = mutex_lock_interruptible(&drmcg_mutex);
+ if (ret)
+ return ret;
+ drmcs->weight = weight;
+ mutex_unlock(&drmcg_mutex);
+
+ return 0;
+}
+
+static bool __start_scanning(unsigned int period_us)
+{
+ struct drm_cgroup_state *root = &root_drmcs.drmcs;
+ struct cgroup_subsys_state *node;
+ ktime_t start, now;
+ bool ok = false;
+
+ lockdep_assert_held(&drmcg_mutex);
+
+ start = ktime_get();
+ if (period_us > root_drmcs.last_scan_duration_us)
+ period_us -= root_drmcs.last_scan_duration_us;
+
+ rcu_read_lock();
+
+ css_for_each_descendant_post(node, &root->css) {
+ struct drm_cgroup_state *drmcs = css_to_drmcs(node);
+
+ if (!css_tryget_online(node))
+ goto out;
+
+ drmcs->active_us = 0;
+ drmcs->sum_children_weights = 0;
+
+ if (period_us && node == &root->css)
+ drmcs->per_s_budget_us =
+ DIV_ROUND_UP_ULL((u64)period_us * USEC_PER_SEC,
+ USEC_PER_SEC);
+ else
+ drmcs->per_s_budget_us = 0;
+
+ css_put(node);
+ }
+
+ css_for_each_descendant_post(node, &root->css) {
+ struct drm_cgroup_state *drmcs = css_to_drmcs(node);
+ struct drm_cgroup_state *parent;
+ u64 active;
+
+ if (!css_tryget_online(node))
+ goto out;
+ if (!node->parent) {
+ css_put(node);
+ continue;
+ }
+ if (!css_tryget_online(node->parent)) {
+ css_put(node);
+ goto out;
+ }
+ parent = css_to_drmcs(node->parent);
+
+ active = drmcs_get_active_time_us(drmcs);
+ if (period_us && active > drmcs->prev_active_us)
+ drmcs->active_us += active - drmcs->prev_active_us;
+ drmcs->prev_active_us = active;
+
+ parent->active_us += drmcs->active_us;
+ parent->sum_children_weights += drmcs->weight;
+
+ css_put(node);
+ css_put(&parent->css);
+ }
+
+ ok = true;
+ now = ktime_get();
+ root_drmcs.last_scan_duration_us = ktime_to_us(ktime_sub(now, start));
+ root_drmcs.prev_timestamp = now;
+
+out:
+ rcu_read_unlock();
+
+ return ok;
+}
+
+static void scan_worker(struct work_struct *work)
+{
+ struct drm_cgroup_state *root = &root_drmcs.drmcs;
+ struct cgroup_subsys_state *node;
+ unsigned int period_us;
+
+ mutex_lock(&drmcg_mutex);
+
+ rcu_read_lock();
+
+ if (WARN_ON_ONCE(!css_tryget_online(&root->css))) {
+ rcu_read_unlock();
+ mutex_unlock(&drmcg_mutex);
+ return;
+ }
+
+ period_us = ktime_to_us(ktime_sub(ktime_get(),
+ root_drmcs.prev_timestamp));
+
+ /*
+ * 1st pass - reset working values and update hierarchical weights and
+ * GPU utilisation.
+ */
+ if (!__start_scanning(period_us))
+ goto out_retry; /*
+ * Always come back later if scanner races with
+ * core cgroup management. (Repeated pattern.)
+ */
+
+ css_for_each_descendant_pre(node, &root->css) {
+ struct drm_cgroup_state *drmcs = css_to_drmcs(node);
+ struct cgroup_subsys_state *css;
+ u64 reused_us = 0, unused_us = 0;
+ unsigned int over_weights = 0;
+
+ if (!css_tryget_online(node))
+ goto out_retry;
+
+ /*
+ * 2nd pass - calculate initial budgets, mark over budget
+ * siblings and add up unused budget for the group.
+ */
+ css_for_each_child(css, &drmcs->css) {
+ struct drm_cgroup_state *sibling = css_to_drmcs(css);
+
+ if (!css_tryget_online(css)) {
+ css_put(node);
+ goto out_retry;
+ }
+
+ sibling->per_s_budget_us =
+ DIV_ROUND_UP_ULL(drmcs->per_s_budget_us *
+ sibling->weight,
+ drmcs->sum_children_weights);
+
+ sibling->over = sibling->active_us >
+ sibling->per_s_budget_us;
+ if (sibling->over)
+ over_weights += sibling->weight;
+ else
+ unused_us += sibling->per_s_budget_us -
+ sibling->active_us;
+
+ css_put(css);
+ }
+
+ /*
+ * 3rd pass - spread unused budget according to relative weights
+ * of over budget siblings.
+ */
+ while (over_weights && reused_us < unused_us) {
+ unsigned int under = 0;
+
+ unused_us -= reused_us;
+ reused_us = 0;
+
+ css_for_each_child(css, &drmcs->css) {
+ struct drm_cgroup_state *sibling;
+ u64 extra_us, max_us, need_us;
+
+ if (!css_tryget_online(css)) {
+ css_put(node);
+ goto out_retry;
+ }
+
+ sibling = css_to_drmcs(css);
+ if (!sibling->over) {
+ css_put(css);
+ continue;
+ }
+
+ extra_us = DIV_ROUND_UP_ULL(unused_us *
+ sibling->weight,
+ over_weights);
+ max_us = sibling->per_s_budget_us + extra_us;
+ if (max_us > sibling->active_us)
+ need_us = sibling->active_us -
+ sibling->per_s_budget_us;
+ else
+ need_us = extra_us;
+ reused_us += need_us;
+ sibling->per_s_budget_us += need_us;
+ sibling->over = sibling->active_us >
+ sibling->per_s_budget_us;
+ if (!sibling->over)
+ under += sibling->weight;
+
+ css_put(css);
+ }
+
+ over_weights -= under;
+ }
+
+ css_put(node);
+ }
+
+ /*
+ * 4th pass - send out over/under budget notifications.
+ */
+ css_for_each_descendant_post(node, &root->css) {
+ struct drm_cgroup_state *drmcs = css_to_drmcs(node);
+
+ if (!css_tryget_online(node))
+ goto out_retry;
+
+ if (drmcs->over || drmcs->over_budget)
+ drmcs_signal_budget(drmcs,
+ drmcs->active_us,
+ drmcs->per_s_budget_us);
+ drmcs->over_budget = drmcs->over;
+
+ css_put(node);
+ }
+
+out_retry:
+ rcu_read_unlock();
+ mutex_unlock(&drmcg_mutex);
+
+ period_us = READ_ONCE(root_drmcs.period_us);
+ if (period_us)
+ schedule_delayed_work(&root_drmcs.scan_work,
+ usecs_to_jiffies(period_us));
+
+ css_put(&root->css);
+}
+
static void drmcs_free(struct cgroup_subsys_state *css)
{
- struct drm_cgroup_state *drmcs = css_to_drmcs(css);
+ if (css != &root_drmcs.drmcs.css)
+ kfree(css_to_drmcs(css));
+}
- if (drmcs != &root_drmcs.drmcs)
- kfree(drmcs);
+static void record_baseline_utilisation(void)
+{
+ /* Re-capture baseline group GPU times to avoid downward jumps. */
+ WARN_ON_ONCE(!__start_scanning(0)); /* QQQ Retry if it fails? */
}
static struct cgroup_subsys_state *
@@ -82,6 +356,7 @@ drmcs_alloc(struct cgroup_subsys_state *parent_css)
if (!parent_css) {
drmcs = &root_drmcs.drmcs;
+ INIT_DELAYED_WORK(&root_drmcs.scan_work, scan_worker);
} else {
drmcs = kzalloc(sizeof(*drmcs), GFP_KERNEL);
if (!drmcs)
@@ -90,9 +365,124 @@ drmcs_alloc(struct cgroup_subsys_state *parent_css)
INIT_LIST_HEAD(&drmcs->clients);
}
+ drmcs->weight = CGROUP_WEIGHT_DFL;
+
return &drmcs->css;
}
+static int drmcs_online(struct cgroup_subsys_state *css)
+{
+ if (css == &root_drmcs.drmcs.css && drmcg_period_ms) {
+ mutex_lock(&drmcg_mutex);
+ record_baseline_utilisation();
+ root_drmcs.period_us = max(500, drmcg_period_ms) * 1000;
+ mod_delayed_work(system_wq,
+ &root_drmcs.scan_work,
+ usecs_to_jiffies(root_drmcs.period_us));
+ mutex_unlock(&drmcg_mutex);
+ }
+
+ return 0;
+}
+
+static void drmcs_offline(struct cgroup_subsys_state *css)
+{
+ bool flush = false;
+
+ if (css != &root_drmcs.drmcs.css)
+ return;
+
+ mutex_lock(&drmcg_mutex);
+ if (root_drmcs.period_us) {
+ root_drmcs.period_us = 0;
+ cancel_delayed_work(&root_drmcs.scan_work);
+ flush = true;
+ }
+ mutex_unlock(&drmcg_mutex);
+
+ if (flush)
+ flush_delayed_work(&root_drmcs.scan_work);
+}
+
+static struct drm_cgroup_state *old_drmcs;
+
+static int drmcs_can_attach(struct cgroup_taskset *tset)
+{
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+
+ /*
+ * QQQ
+ * Same passing of state via global as cpuset_can_attach to
+ * cpuset_attach. Always serialized?
+ */
+
+ task = cgroup_taskset_first(tset, &css);
+ old_drmcs = css_to_drmcs(task_css(task, drm_cgrp_id));
+
+ return 0;
+}
+
+static void drmcs_attach(struct cgroup_taskset *tset)
+{
+ struct drm_cgroup_state *old = old_drmcs;
+ struct cgroup_subsys_state *css;
+ struct drm_file *fpriv, *next;
+ struct drm_cgroup_state *new;
+ struct task_struct *task;
+ bool migrated = false;
+
+ if (!old)
+ return;
+
+ task = cgroup_taskset_first(tset, &css);
+ new = css_to_drmcs(task_css(task, drm_cgrp_id));
+ if (new == old)
+ return;
+
+ mutex_lock(&drmcg_mutex);
+
+ list_for_each_entry_safe(fpriv, next, &old->clients, clink) {
+ cgroup_taskset_for_each(task, css, tset) {
+ struct cgroup_subsys_state *old_css;
+
+ if (task->flags & PF_KTHREAD)
+ continue;
+ if (!thread_group_leader(task))
+ continue;
+
+ new = css_to_drmcs(task_css(task, drm_cgrp_id));
+ if (WARN_ON_ONCE(new == old))
+ continue;
+
+ if (rcu_access_pointer(fpriv->pid) != task_tgid(task))
+ continue;
+
+ if (WARN_ON_ONCE(fpriv->__css != &old->css))
+ continue;
+
+ old_css = fpriv->__css;
+ fpriv->__css = &new->css;
+ css_get(fpriv->__css);
+ list_move_tail(&fpriv->clink, &new->clients);
+ css_put(old_css);
+ migrated = true;
+ }
+ }
+
+ if (migrated)
+ record_baseline_utilisation();
+
+ mutex_unlock(&drmcg_mutex);
+
+ old_drmcs = NULL;
+}
+
+static void drmcs_cancel_attach(struct cgroup_taskset *tset)
+{
+ old_drmcs = NULL;
+}
+
void drmcgroup_client_open(struct drm_file *file_priv)
{
struct drm_cgroup_state *drmcs;
@@ -121,6 +511,7 @@ void drmcgroup_client_close(struct drm_file *file_priv)
mutex_lock(&drmcg_mutex);
list_del(&file_priv->clink);
file_priv->__css = NULL;
+ record_baseline_utilisation();
mutex_unlock(&drmcg_mutex);
css_put(&drmcs->css);
@@ -144,6 +535,7 @@ void drmcgroup_client_migrate(struct drm_file *file_priv)
if (src != dst) {
file_priv->__css = &dst->css; /* Keeps the reference. */
list_move_tail(&file_priv->clink, &dst->clients);
+ record_baseline_utilisation();
}
mutex_unlock(&drmcg_mutex);
@@ -153,12 +545,23 @@ void drmcgroup_client_migrate(struct drm_file *file_priv)
EXPORT_SYMBOL_GPL(drmcgroup_client_migrate);
struct cftype files[] = {
+ {
+ .name = "weight",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_u64 = drmcs_read_weight,
+ .write_u64 = drmcs_write_weight,
+ },
{ } /* Zero entry terminates. */
};
struct cgroup_subsys drm_cgrp_subsys = {
.css_alloc = drmcs_alloc,
.css_free = drmcs_free,
+ .css_online = drmcs_online,
+ .css_offline = drmcs_offline,
+ .can_attach = drmcs_can_attach,
+ .attach = drmcs_attach,
+ .cancel_attach = drmcs_cancel_attach,
.early_init = false,
.legacy_cftypes = files,
.dfl_cftypes = files,
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* Re: [PATCH 12/17] cgroup/drm: Introduce weight based drm cgroup control
2023-07-12 11:46 ` [PATCH 12/17] cgroup/drm: Introduce weight based drm cgroup control Tvrtko Ursulin
@ 2023-07-21 22:17 ` Tejun Heo
[not found] ` <ZLsEEYDFlJZwrJiV-NiLfg/pYEd1N0TnZuCh8vA@public.gmane.org>
0 siblings, 1 reply; 39+ messages in thread
From: Tejun Heo @ 2023-07-21 22:17 UTC (permalink / raw)
To: Tvrtko Ursulin
Cc: Rob Clark, Kenny.Ho, Dave Airlie, Stéphane Marchesin,
Daniel Vetter, Intel-gfx, linux-kernel, dri-devel,
Christian König, Michal Koutný, Zefan Li,
Johannes Weiner, cgroups, T . J . Mercier
On Wed, Jul 12, 2023 at 12:46:00PM +0100, Tvrtko Ursulin wrote:
> +DRM scheduling soft limits
> +~~~~~~~~~~~~~~~~~~~~~~~~~~
Please don't say soft limits for this. It means something different for
memcg, so it gets really confusing. Call it "weight based CPU time control"
and maybe call the triggering points as thresholds.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 39+ messages in thread
* [PATCH 13/17] drm/i915: Wire up with drm controller GPU time query
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (11 preceding siblings ...)
2023-07-12 11:46 ` [PATCH 12/17] cgroup/drm: Introduce weight based drm cgroup control Tvrtko Ursulin
@ 2023-07-12 11:46 ` Tvrtko Ursulin
2023-07-12 11:46 ` [PATCH 14/17] drm/i915: Implement cgroup controller over budget throttling Tvrtko Ursulin
` (4 subsequent siblings)
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:46 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Implement the drm_cgroup_ops->active_time_us callback.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_driver.c | 8 ++
drivers/gpu/drm/i915/i915_drm_client.c | 116 ++++++++++++++++++-------
drivers/gpu/drm/i915/i915_drm_client.h | 2 +
3 files changed, 94 insertions(+), 32 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 294b022de22b..62a544d17659 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -1791,6 +1791,12 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW),
};
+#ifdef CONFIG_CGROUP_DRM
+static const struct drm_cgroup_ops i915_drm_cgroup_ops = {
+ .active_time_us = i915_drm_cgroup_get_active_time_us,
+};
+#endif
+
/*
* Interface history:
*
@@ -1820,6 +1826,8 @@ static const struct drm_driver i915_drm_driver = {
.postclose = i915_driver_postclose,
.show_fdinfo = PTR_IF(IS_ENABLED(CONFIG_PROC_FS), i915_drm_client_fdinfo),
+ .cg_ops = PTR_IF(IS_ENABLED(CONFIG_CGROUP_DRM), &i915_drm_cgroup_ops),
+
.gem_prime_import = i915_gem_prime_import,
.dumb_create = i915_gem_dumb_create,
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 9e7a6075ee25..c3298beb094a 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -44,6 +44,86 @@ void __i915_drm_client_free(struct kref *kref)
kfree(client);
}
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_CGROUP_DRM)
+static const char * const uabi_class_names[] = {
+ [I915_ENGINE_CLASS_RENDER] = "render",
+ [I915_ENGINE_CLASS_COPY] = "copy",
+ [I915_ENGINE_CLASS_VIDEO] = "video",
+ [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "video-enhance",
+ [I915_ENGINE_CLASS_COMPUTE] = "compute",
+};
+
+static u64 busy_add(struct i915_gem_context *ctx, unsigned int class)
+{
+ struct i915_gem_engines_iter it;
+ struct intel_context *ce;
+ u64 total = 0;
+
+ for_each_gem_engine(ce, rcu_dereference(ctx->engines), it) {
+ if (ce->engine->uabi_class != class)
+ continue;
+
+ total += intel_context_get_total_runtime_ns(ce);
+ }
+
+ return total;
+}
+
+static u64 get_class_active_ns(struct i915_drm_client *client,
+ struct drm_i915_private *i915,
+ unsigned int class,
+ unsigned int *capacity)
+{
+ struct i915_gem_context *ctx;
+ u64 total;
+
+ *capacity = i915->engine_uabi_class_count[class];
+ if (!*capacity)
+ return 0;
+
+ total = atomic64_read(&client->past_runtime[class]);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, &client->ctx_list, client_link)
+ total += busy_add(ctx, class);
+ rcu_read_unlock();
+
+ return total;
+}
+
+static bool supports_stats(struct drm_i915_private *i915)
+{
+ return GRAPHICS_VER(i915) >= 8;
+}
+#endif
+
+#if defined(CONFIG_CGROUP_DRM)
+u64 i915_drm_cgroup_get_active_time_us(struct drm_file *file)
+{
+ struct drm_i915_file_private *fpriv = file->driver_priv;
+ struct i915_drm_client *client = fpriv->client;
+ struct drm_i915_private *i915 = fpriv->i915;
+ unsigned int i;
+ u64 busy = 0;
+
+ if (!supports_stats(i915))
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) {
+ unsigned int capacity;
+ u64 b;
+
+ b = get_class_active_ns(client, i915, i, &capacity);
+ if (capacity) {
+ b = DIV_ROUND_UP_ULL(b, capacity * 1000);
+ busy += b;
+ }
+ }
+
+ return busy;
+}
+#endif
+
#ifdef CONFIG_PROC_FS
static void
obj_meminfo(struct drm_i915_gem_object *obj,
@@ -128,44 +208,16 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file)
mr->name);
}
-static const char * const uabi_class_names[] = {
- [I915_ENGINE_CLASS_RENDER] = "render",
- [I915_ENGINE_CLASS_COPY] = "copy",
- [I915_ENGINE_CLASS_VIDEO] = "video",
- [I915_ENGINE_CLASS_VIDEO_ENHANCE] = "video-enhance",
- [I915_ENGINE_CLASS_COMPUTE] = "compute",
-};
-
-static u64 busy_add(struct i915_gem_context *ctx, unsigned int class)
-{
- struct i915_gem_engines_iter it;
- struct intel_context *ce;
- u64 total = 0;
-
- for_each_gem_engine(ce, rcu_dereference(ctx->engines), it) {
- if (ce->engine->uabi_class != class)
- continue;
-
- total += intel_context_get_total_runtime_ns(ce);
- }
-
- return total;
-}
-
static void
show_client_class(struct drm_printer *p,
struct drm_i915_private *i915,
struct i915_drm_client *client,
unsigned int class)
{
- const unsigned int capacity = i915->engine_uabi_class_count[class];
- u64 total = atomic64_read(&client->past_runtime[class]);
- struct i915_gem_context *ctx;
+ unsigned int capacity;
+ u64 total;
- rcu_read_lock();
- list_for_each_entry_rcu(ctx, &client->ctx_list, client_link)
- total += busy_add(ctx, class);
- rcu_read_unlock();
+ total = get_class_active_ns(client, i915, class, &capacity);
if (capacity)
drm_printf(p, "drm-engine-%s:\t%llu ns\n",
@@ -191,7 +243,7 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
show_meminfo(p, file);
- if (GRAPHICS_VER(i915) < 8)
+ if (!supports_stats(i915))
return;
for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 69cedfcd3d69..e0b143890e69 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -90,4 +90,6 @@ i915_drm_client_add_context_objects(struct i915_drm_client *client,
}
#endif
+u64 i915_drm_cgroup_get_active_time_us(struct drm_file *file);
+
#endif /* !__I915_DRM_CLIENT_H__ */
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 14/17] drm/i915: Implement cgroup controller over budget throttling
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (12 preceding siblings ...)
2023-07-12 11:46 ` [PATCH 13/17] drm/i915: Wire up with drm controller GPU time query Tvrtko Ursulin
@ 2023-07-12 11:46 ` Tvrtko Ursulin
2023-07-12 11:46 ` [PATCH 15/17] cgroup/drm: Expose GPU utilisation Tvrtko Ursulin
` (3 subsequent siblings)
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:46 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
When notified by the drm core we are over our allotted time budget, i915
instance will check if any of the GPU engines it is reponsible for is
fully saturated. If it is, and the client in question is using that
engine, it will throttle it.
For now throttling is done simplistically by lowering the scheduling
priority while clients are throttled.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 38 ++++-
drivers/gpu/drm/i915/i915_driver.c | 1 +
drivers/gpu/drm/i915/i915_drm_client.c | 133 ++++++++++++++++++
drivers/gpu/drm/i915/i915_drm_client.h | 9 ++
4 files changed, 180 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index d3208a325614..047628769aa0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -3086,6 +3086,42 @@ static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
break;
}
+#ifdef CONFIG_CGROUP_DRM
+static unsigned int
+__get_class(struct drm_i915_file_private *fpriv, const struct i915_request *rq)
+{
+ unsigned int class;
+
+ class = rq->context->engine->uabi_class;
+
+ if (WARN_ON_ONCE(class >= ARRAY_SIZE(fpriv->client->throttle)))
+ class = 0;
+
+ return class;
+}
+
+static void copy_priority(struct i915_sched_attr *attr,
+ const struct i915_execbuffer *eb,
+ const struct i915_request *rq)
+{
+ struct drm_i915_file_private *file_priv = eb->file->driver_priv;
+ int prio;
+
+ *attr = eb->gem_context->sched;
+
+ prio = file_priv->client->throttle[__get_class(file_priv, rq)];
+ if (prio)
+ attr->priority = prio;
+}
+#else
+static void copy_priority(struct i915_sched_attr *attr,
+ const struct i915_execbuffer *eb,
+ const struct i915_request *rq)
+{
+ *attr = eb->gem_context->sched;
+}
+#endif
+
static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq,
int err, bool last_parallel)
{
@@ -3102,7 +3138,7 @@ static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq,
/* Check that the context wasn't destroyed before submission */
if (likely(!intel_context_is_closed(eb->context))) {
- attr = eb->gem_context->sched;
+ copy_priority(&attr, eb, rq);
} else {
/* Serialise with context_close via the add_to_timeline */
i915_request_set_error_once(rq, -ENOENT);
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 62a544d17659..3b9d47c2097b 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -1794,6 +1794,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
#ifdef CONFIG_CGROUP_DRM
static const struct drm_cgroup_ops i915_drm_cgroup_ops = {
.active_time_us = i915_drm_cgroup_get_active_time_us,
+ .signal_budget = i915_drm_cgroup_signal_budget,
};
#endif
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index c3298beb094a..9be007b10523 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -4,6 +4,7 @@
*/
#include <linux/kernel.h>
+#include <linux/ktime.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -122,6 +123,138 @@ u64 i915_drm_cgroup_get_active_time_us(struct drm_file *file)
return busy;
}
+
+int i915_drm_cgroup_signal_budget(struct drm_file *file, u64 usage, u64 budget)
+{
+ struct drm_i915_file_private *fpriv = file->driver_priv;
+ u64 class_usage[I915_LAST_UABI_ENGINE_CLASS + 1];
+ u64 class_last[I915_LAST_UABI_ENGINE_CLASS + 1];
+ struct i915_drm_client *client = fpriv->client;
+ struct drm_i915_private *i915 = fpriv->i915;
+ struct intel_engine_cs *engine;
+ bool over = usage > budget;
+ struct task_struct *task;
+ struct pid *pid;
+ unsigned int i;
+ ktime_t unused;
+ int ret = 0;
+ u64 t;
+
+ if (!supports_stats(i915))
+ return -EINVAL;
+
+ if (usage == 0 && budget == 0)
+ return 0;
+
+ rcu_read_lock();
+ pid = rcu_dereference(file->pid);
+ task = pid_task(pid, PIDTYPE_TGID);
+ if (over) {
+ client->over_budget++;
+ if (!client->over_budget)
+ client->over_budget = 2;
+
+ drm_dbg(&i915->drm, "%s[%u] over budget (%llu/%llu)\n",
+ task ? task->comm : "<unknown>", pid_vnr(pid),
+ usage, budget);
+ } else {
+ client->over_budget = 0;
+ memset(client->class_last, 0, sizeof(client->class_last));
+ memset(client->throttle, 0, sizeof(client->throttle));
+
+ drm_dbg(&i915->drm, "%s[%u] un-throttled; under budget\n",
+ task ? task->comm : "<unknown>", pid_vnr(pid));
+
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+
+ memset(class_usage, 0, sizeof(class_usage));
+ for_each_uabi_engine(engine, i915)
+ class_usage[engine->uabi_class] +=
+ ktime_to_ns(intel_engine_get_busy_time(engine, &unused));
+
+ memcpy(class_last, client->class_last, sizeof(class_last));
+ memcpy(client->class_last, class_usage, sizeof(class_last));
+
+ for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
+ class_usage[i] -= class_last[i];
+
+ t = client->last;
+ client->last = ktime_get_raw_ns();
+ t = client->last - t;
+
+ if (client->over_budget == 1)
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++) {
+ u64 client_class_usage[I915_LAST_UABI_ENGINE_CLASS + 1];
+ unsigned int capacity, rel_usage;
+
+ if (!i915->engine_uabi_class_count[i])
+ continue;
+
+ t = DIV_ROUND_UP_ULL(t, 1000);
+ class_usage[i] = DIV_ROUND_CLOSEST_ULL(class_usage[i], 1000);
+ rel_usage = DIV_ROUND_CLOSEST_ULL(class_usage[i] * 100ULL,
+ t *
+ i915->engine_uabi_class_count[i]);
+ if (rel_usage < 95) {
+ /* Physical class not oversubsribed. */
+ if (client->throttle[i]) {
+ client->throttle[i] = 0;
+
+ rcu_read_lock();
+ pid = rcu_dereference(file->pid);
+ task = pid_task(pid, PIDTYPE_TGID);
+ drm_dbg(&i915->drm,
+ "%s[%u] un-throttled; physical class %s utilisation %u%%\n",
+ task ? task->comm : "<unknown>",
+ pid_vnr(pid),
+ uabi_class_names[i],
+ rel_usage);
+ rcu_read_unlock();
+ }
+ continue;
+ }
+
+ client_class_usage[i] =
+ get_class_active_ns(client, i915, i, &capacity);
+ if (client_class_usage[i]) {
+ int permille;
+
+ ret |= 1;
+
+ permille = DIV_ROUND_CLOSEST_ULL((usage - budget) *
+ 1000,
+ budget);
+ client->throttle[i] =
+ DIV_ROUND_CLOSEST(permille *
+ I915_CONTEXT_MIN_USER_PRIORITY,
+ 1000);
+ if (client->throttle[i] <
+ I915_CONTEXT_MIN_USER_PRIORITY)
+ client->throttle[i] =
+ I915_CONTEXT_MIN_USER_PRIORITY;
+
+ rcu_read_lock();
+ pid = rcu_dereference(file->pid);
+ task = pid_task(pid, PIDTYPE_TGID);
+ drm_dbg(&i915->drm,
+ "%s[%u] %d‰ over budget, throttled to priority %d; physical class %s utilisation %u%%\n",
+ task ? task->comm : "<unknown>",
+ pid_vnr(pid),
+ permille,
+ client->throttle[i],
+ uabi_class_names[i],
+ rel_usage);
+ rcu_read_unlock();
+ }
+ }
+
+ return ret;
+}
#endif
#ifdef CONFIG_PROC_FS
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index e0b143890e69..6eadc9596b8f 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -47,6 +47,13 @@ struct i915_drm_client {
* @past_runtime: Accumulation of pphwsp runtimes from closed contexts.
*/
atomic64_t past_runtime[I915_LAST_UABI_ENGINE_CLASS + 1];
+
+#ifdef CONFIG_CGROUP_DRM
+ int throttle[I915_LAST_UABI_ENGINE_CLASS + 1];
+ unsigned int over_budget;
+ u64 last;
+ u64 class_last[I915_LAST_UABI_ENGINE_CLASS + 1];
+#endif
};
static inline struct i915_drm_client *
@@ -91,5 +98,7 @@ i915_drm_client_add_context_objects(struct i915_drm_client *client,
#endif
u64 i915_drm_cgroup_get_active_time_us(struct drm_file *file);
+int i915_drm_cgroup_signal_budget(struct drm_file *file,
+ u64 usage, u64 budget);
#endif /* !__I915_DRM_CLIENT_H__ */
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 15/17] cgroup/drm: Expose GPU utilisation
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (13 preceding siblings ...)
2023-07-12 11:46 ` [PATCH 14/17] drm/i915: Implement cgroup controller over budget throttling Tvrtko Ursulin
@ 2023-07-12 11:46 ` Tvrtko Ursulin
[not found] ` <20230712114605.519432-16-tvrtko.ursulin-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2023-07-12 11:46 ` [PATCH 16/17] cgroup/drm: Expose memory stats Tvrtko Ursulin
` (2 subsequent siblings)
17 siblings, 1 reply; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:46 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Eero Tamminen,
Johannes Weiner, linux-kernel, Stéphane Marchesin,
Christian König, Zefan Li, Dave Airlie, Tejun Heo, cgroups,
T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
To support container use cases where external orchestrators want to make
deployment and migration decisions based on GPU load and capacity, we can
expose the GPU load as seen by the controller in a new drm.active_us
field. This field contains a monotonic cumulative time cgroup has spent
executing GPU loads, as reported by the DRM drivers being used by group
members.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Eero Tamminen <eero.t.tamminen@intel.com>
---
Documentation/admin-guide/cgroup-v2.rst | 3 +++
kernel/cgroup/drm.c | 26 ++++++++++++++++++++++++-
2 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index da350858c59f..bbe986366f4a 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2445,6 +2445,9 @@ will be respected.
DRM scheduling soft limits interface files
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ drm.active_us
+ GPU time used by the group recursively including all child groups.
+
drm.weight
Standard cgroup weight based control [1, 10000] used to configure the
relative distributing of GPU time between the sibling groups.
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index b244e3d828cc..7c20d4ebc634 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -25,6 +25,8 @@ struct drm_cgroup_state {
bool over;
bool over_budget;
+ u64 total_us;
+
u64 per_s_budget_us;
u64 prev_active_us;
u64 active_us;
@@ -117,6 +119,20 @@ drmcs_write_weight(struct cgroup_subsys_state *css, struct cftype *cftype,
return 0;
}
+static u64
+drmcs_read_total_us(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ struct drm_cgroup_state *drmcs = css_to_drmcs(css);
+ u64 val;
+
+ /* Mutex being overkill unless arch cannot atomically read u64.. */
+ mutex_lock(&drmcg_mutex);
+ val = drmcs->total_us;
+ mutex_unlock(&drmcg_mutex);
+
+ return val;
+}
+
static bool __start_scanning(unsigned int period_us)
{
struct drm_cgroup_state *root = &root_drmcs.drmcs;
@@ -169,11 +185,14 @@ static bool __start_scanning(unsigned int period_us)
parent = css_to_drmcs(node->parent);
active = drmcs_get_active_time_us(drmcs);
- if (period_us && active > drmcs->prev_active_us)
+ if (period_us && active > drmcs->prev_active_us) {
drmcs->active_us += active - drmcs->prev_active_us;
+ drmcs->total_us += drmcs->active_us;
+ }
drmcs->prev_active_us = active;
parent->active_us += drmcs->active_us;
+ parent->total_us += drmcs->active_us;
parent->sum_children_weights += drmcs->weight;
css_put(node);
@@ -551,6 +570,11 @@ struct cftype files[] = {
.read_u64 = drmcs_read_weight,
.write_u64 = drmcs_write_weight,
},
+ {
+ .name = "active_us",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .read_u64 = drmcs_read_total_us,
+ },
{ } /* Zero entry terminates. */
};
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 16/17] cgroup/drm: Expose memory stats
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (14 preceding siblings ...)
2023-07-12 11:46 ` [PATCH 15/17] cgroup/drm: Expose GPU utilisation Tvrtko Ursulin
@ 2023-07-12 11:46 ` Tvrtko Ursulin
[not found] ` <20230712114605.519432-17-tvrtko.ursulin-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>
2023-07-12 11:46 ` [PATCH 17/17] drm/i915: Wire up to the drm cgroup " Tvrtko Ursulin
2023-07-19 20:31 ` [RFC v5 00/17] DRM cgroup controller with scheduling control and " T.J. Mercier
17 siblings, 1 reply; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:46 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Eero Tamminen,
Johannes Weiner, linux-kernel, Stéphane Marchesin,
Christian König, Zefan Li, Dave Airlie, Tejun Heo, cgroups,
T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
With a few DRM drivers exposing per client memory stats via the fdinfo
interface already, we can add support for exposing the same data (albeit
aggregated for cgroup hierarchies) via the drm cgroup controller.
Add some driver callbacks and controller code to use them, walking the
sub-tree, collating the numbers, and presenting them in a new field
name drm.memory.stat.
Example file content:
$ cat drm.memory.stat
card0 region=system total=12898304 shared=0 active=0 resident=12111872 purgeable=167936
card0 region=stolen-system total=0 shared=0 active=0 resident=0 purgeable=0
Data is generated on demand for simplicty of implementation ie. no running
totals are kept or accounted during migrations and such. Various
optimisations such as cheaper collection of data are possible but
deliberately left out for now.
Overall, the feature is deemed to be useful to container orchestration
software (and manual management).
Limits, either soft or hard, are not envisaged to be implemented on top of
this approach due on demand nature of collecting the stats.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Eero Tamminen <eero.t.tamminen@intel.com>
---
Documentation/admin-guide/cgroup-v2.rst | 22 ++++
include/drm/drm_drv.h | 61 ++++++++++
kernel/cgroup/drm.c | 149 ++++++++++++++++++++++++
3 files changed, 232 insertions(+)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index bbe986366f4a..1891c7d98206 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2452,6 +2452,28 @@ DRM scheduling soft limits interface files
Standard cgroup weight based control [1, 10000] used to configure the
relative distributing of GPU time between the sibling groups.
+ drm.memory.stat
+ A nested file containing cumulative memory statistics for the whole
+ sub-hierarchy, broken down into separate GPUs and separate memory
+ regions supported by the latter.
+
+ For example::
+
+ $ cat drm.memory.stat
+ card0 region=system total=12898304 shared=0 active=0 resident=12111872 purgeable=167936
+ card0 region=stolen-system total=0 shared=0 active=0 resident=0 purgeable=0
+
+ Card designation corresponds to the DRM device names and multiple line
+ entries can be present per card.
+
+ Memory region names should be expected to be driver specific with the
+ exception of 'system' which is standardised and applicable for GPUs
+ which can operate on system memory buffers.
+
+ Sub-keys 'resident' and 'purgeable' are optional.
+
+ Per category region usage is reported in bytes.
+
Misc
----
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 29e11a87bf75..2ea9a46b5031 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -41,6 +41,7 @@ struct drm_minor;
struct dma_buf;
struct dma_buf_attachment;
struct drm_display_mode;
+struct drm_memory_stats;
struct drm_mode_create_dumb;
struct drm_printer;
struct sg_table;
@@ -175,6 +176,66 @@ struct drm_cgroup_ops {
* messages sent by the DRM cgroup controller.
*/
int (*signal_budget) (struct drm_file *, u64 used, u64 budget);
+
+
+ /**
+ * @num_memory_regions:
+ *
+ * Optional callback reporting the number of memory regions driver
+ * supports.
+ *
+ * Callback is allowed to report a larger number than present memory
+ * regions, but @memory_region_name is then supposed to return NULL for
+ * those indices.
+ *
+ * Used by the DRM core when queried by the DRM cgroup controller.
+ *
+ * All three callbacks of @num_memory_regions, @memory_region_name and
+ * @memory_stats need to be implemented for DRM cgroup memory stats
+ * support.
+ */
+ unsigned int (*num_memory_regions) (const struct drm_device *);
+
+ /**
+ * @memory_region_name:
+ *
+ * Optional callback reporting the name of the queried memory region.
+ *
+ * Can be NULL if the memory region index is not supported by the
+ * passed in device.
+ *
+ * Used by the DRM core when queried by the DRM cgroup controller.
+ *
+ * All three callbacks of @num_memory_regions, @memory_region_name and
+ * @memory_stats need to be implemented for DRM cgroup memory stats
+ * support.
+ */
+ const char * (*memory_region_name) (const struct drm_device *,
+ unsigned int index);
+
+ /**
+ * memory_stats:
+ *
+ * Optional callback adding to the passed in array of struct
+ * drm_memory_stats objects.
+ *
+ * Number of objects in the array is passed in the @num argument.
+ *
+ * Returns a bitmask of supported enum drm_gem_object_status by the
+ * driver instance.
+ *
+ * Callback is only allow to add to the existing fields and should
+ * never clear them.
+ *
+ * Used by the DRM core when queried by the DRM cgroup controller.
+ *
+ * All three callbacks of @num_memory_regions, @memory_region_name and
+ * @memory_stats need to be implemented for DRM cgroup memory stats
+ * support.
+ */
+ unsigned int (*memory_stats) (struct drm_file *,
+ struct drm_memory_stats *,
+ unsigned int num);
};
/**
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 7c20d4ebc634..22fc180dd659 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -5,6 +5,7 @@
#include <linux/cgroup.h>
#include <linux/cgroup_drm.h>
+#include <linux/device.h>
#include <linux/list.h>
#include <linux/moduleparam.h>
#include <linux/mutex.h>
@@ -12,6 +13,8 @@
#include <linux/slab.h>
#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
struct drm_cgroup_state {
struct cgroup_subsys_state css;
@@ -133,6 +136,147 @@ drmcs_read_total_us(struct cgroup_subsys_state *css, struct cftype *cft)
return val;
}
+struct drmcs_stat {
+ const struct drm_device *dev;
+ const struct drm_cgroup_ops *cg_ops;
+ const char *device_name;
+ unsigned int regions;
+ enum drm_gem_object_status flags;
+ struct drm_memory_stats *stats;
+};
+
+static int drmcs_seq_show_memory(struct seq_file *sf, void *v)
+{
+ struct cgroup_subsys_state *node;
+ struct drmcs_stat *stats = NULL;
+ unsigned int num_devices, i;
+ int ret;
+
+ /*
+ * We could avoid taking the cgroup_lock and just walk the tree under
+ * RCU but then allocating temporary storage becomes a problem. So for
+ * now keep it simple and take the lock.
+ */
+ cgroup_lock();
+
+ /* Protect against client migrations and clients disappearing. */
+ ret = mutex_lock_interruptible(&drmcg_mutex);
+ if (ret) {
+ cgroup_unlock();
+ return ret;
+ }
+
+ num_devices = 0;
+ css_for_each_descendant_pre(node, seq_css(sf)) {
+ struct drm_cgroup_state *drmcs = css_to_drmcs(node);
+ struct drm_file *fpriv;
+
+ list_for_each_entry(fpriv, &drmcs->clients, clink) {
+ const struct drm_cgroup_ops *cg_ops =
+ fpriv->minor->dev->driver->cg_ops;
+ const char *device_name = dev_name(fpriv->minor->kdev);
+ struct drmcs_stat *stat;
+ unsigned int regions;
+
+ /* Does this driver supports memory stats? */
+ if (cg_ops &&
+ cg_ops->num_memory_regions &&
+ cg_ops->memory_region_name &&
+ cg_ops->memory_stats)
+ regions =
+ cg_ops->num_memory_regions(fpriv->minor->dev);
+ else
+ regions = 0;
+
+ if (!regions)
+ continue;
+
+ /* Have we seen this device before? */
+ stat = NULL;
+ for (i = 0; i < num_devices; i++) {
+ if (!strcmp(stats[i].device_name,
+ device_name)) {
+ stat = &stats[i];
+ break;
+ }
+ }
+
+ /* If not allocate space for it. */
+ if (!stat) {
+ stats = krealloc_array(stats, num_devices + 1,
+ sizeof(*stats),
+ GFP_USER);
+ if (!stats) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ stat = &stats[num_devices++];
+ stat->dev = fpriv->minor->dev;
+ stat->cg_ops = cg_ops;
+ stat->device_name = device_name;
+ stat->flags = 0;
+ stat->regions = regions;
+ stat->stats =
+ kcalloc(regions,
+ sizeof(struct drm_memory_stats),
+ GFP_USER);
+ if (!stat->stats) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ /* Accumulate the stats for this device+client. */
+ stat->flags |= cg_ops->memory_stats(fpriv,
+ stat->stats,
+ stat->regions);
+ }
+ }
+
+ for (i = 0; i < num_devices; i++) {
+ struct drmcs_stat *stat = &stats[i];
+ unsigned int j;
+
+ for (j = 0; j < stat->regions; j++) {
+ const char *name =
+ stat->cg_ops->memory_region_name(stat->dev, j);
+
+ if (!name)
+ continue;
+
+ seq_printf(sf,
+ "%s region=%s total=%llu shared=%llu active=%llu",
+ stat->device_name,
+ name,
+ stat->stats[j].private +
+ stat->stats[j].shared,
+ stat->stats[j].shared,
+ stat->stats[j].active);
+
+ if (stat->flags & DRM_GEM_OBJECT_RESIDENT)
+ seq_printf(sf, " resident=%llu",
+ stat->stats[j].resident);
+
+ if (stat->flags & DRM_GEM_OBJECT_PURGEABLE)
+ seq_printf(sf, " purgeable=%llu",
+ stat->stats[j].purgeable);
+
+ seq_puts(sf, "\n");
+ }
+ }
+
+out:
+ mutex_unlock(&drmcg_mutex);
+ cgroup_unlock();
+
+ for (i = 0; i < num_devices; i++)
+ kfree(stats[i].stats);
+ kfree(stats);
+
+ return ret;
+}
+
static bool __start_scanning(unsigned int period_us)
{
struct drm_cgroup_state *root = &root_drmcs.drmcs;
@@ -575,6 +719,11 @@ struct cftype files[] = {
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = drmcs_read_total_us,
},
+ {
+ .name = "memory.stat",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = drmcs_seq_show_memory,
+ },
{ } /* Zero entry terminates. */
};
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* [PATCH 17/17] drm/i915: Wire up to the drm cgroup memory stats
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (15 preceding siblings ...)
2023-07-12 11:46 ` [PATCH 16/17] cgroup/drm: Expose memory stats Tvrtko Ursulin
@ 2023-07-12 11:46 ` Tvrtko Ursulin
2023-07-19 20:31 ` [RFC v5 00/17] DRM cgroup controller with scheduling control and " T.J. Mercier
17 siblings, 0 replies; 39+ messages in thread
From: Tvrtko Ursulin @ 2023-07-12 11:46 UTC (permalink / raw)
To: Intel-gfx, dri-devel
Cc: Rob Clark, Kenny.Ho, Daniel Vetter, Johannes Weiner, linux-kernel,
Stéphane Marchesin, Christian König, Zefan Li,
Dave Airlie, Tejun Heo, cgroups, T . J . Mercier
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Simply refactor the existing helpers which collate the data for fdinfo
and share them with thin drm cgroup controller callbacks.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_driver.c | 4 +
drivers/gpu/drm/i915/i915_drm_client.c | 183 ++++++++++++++++---------
drivers/gpu/drm/i915/i915_drm_client.h | 11 +-
3 files changed, 129 insertions(+), 69 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 3b9d47c2097b..a299edc9eb79 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -1795,6 +1795,10 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
static const struct drm_cgroup_ops i915_drm_cgroup_ops = {
.active_time_us = i915_drm_cgroup_get_active_time_us,
.signal_budget = i915_drm_cgroup_signal_budget,
+
+ .num_memory_regions = i915_drm_cgroup_num_memory_regions,
+ .memory_region_name = i915_drm_cgroup_memory_region_name,
+ .memory_stats = i915_drm_cgroup_memory_stats,
};
#endif
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index 9be007b10523..c54b1ac753c6 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -29,7 +29,7 @@ struct i915_drm_client *i915_drm_client_alloc(void)
kref_init(&client->kref);
spin_lock_init(&client->ctx_lock);
INIT_LIST_HEAD(&client->ctx_list);
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_CGROUP_DRM)
spin_lock_init(&client->objects_lock);
INIT_LIST_HEAD(&client->objects_list);
#endif
@@ -46,6 +46,89 @@ void __i915_drm_client_free(struct kref *kref)
}
#if defined(CONFIG_PROC_FS) || defined(CONFIG_CGROUP_DRM)
+static void
+obj_meminfo(struct drm_i915_gem_object *obj,
+ struct drm_memory_stats *stats,
+ unsigned int num)
+{
+ struct intel_memory_region *mr;
+ u64 sz = obj->base.size;
+ enum intel_region_id id;
+ unsigned int i;
+
+ /* Attribute size and shared to all possible memory regions. */
+ for (i = 0; i < obj->mm.n_placements; i++) {
+ mr = obj->mm.placements[i];
+ id = mr->id;
+
+ if (WARN_ON_ONCE(id >= num))
+ return;
+
+ if (obj->base.handle_count > 1)
+ stats[id].shared += sz;
+ else
+ stats[id].private += sz;
+ }
+
+ /* Attribute other categories to only the current region. */
+ mr = obj->mm.region;
+ if (mr)
+ id = mr->id;
+ else
+ id = INTEL_REGION_SMEM;
+
+ if (WARN_ON_ONCE(id >= num))
+ return;
+
+ if (!obj->mm.n_placements) {
+ if (obj->base.handle_count > 1)
+ stats[id].shared += sz;
+ else
+ stats[id].private += sz;
+ }
+
+ if (i915_gem_object_has_pages(obj)) {
+ stats[id].resident += sz;
+
+ if (!dma_resv_test_signaled(obj->base.resv,
+ dma_resv_usage_rw(true)))
+ stats[id].active += sz;
+ else if (i915_gem_object_is_shrinkable(obj) &&
+ obj->mm.madv == I915_MADV_DONTNEED)
+ stats[id].purgeable += sz;
+ }
+}
+
+static void
+memory_stats(struct drm_file *file,
+ struct drm_memory_stats *stats,
+ unsigned int num)
+{
+ struct drm_i915_file_private *fpriv = file->driver_priv;
+ struct i915_drm_client *client = fpriv->client;
+ struct drm_i915_gem_object *obj;
+ struct list_head *pos;
+ unsigned int id;
+
+ /* Public objects. */
+ spin_lock(&file->table_lock);
+ idr_for_each_entry(&file->object_idr, obj, id)
+ obj_meminfo(obj, stats, num);
+ spin_unlock(&file->table_lock);
+
+ /* Internal objects. */
+ rcu_read_lock();
+ list_for_each_rcu(pos, &client->objects_list) {
+ obj = i915_gem_object_get_rcu(list_entry(pos, typeof(*obj),
+ client_link));
+ if (!obj)
+ continue;
+ obj_meminfo(obj, stats, num);
+ i915_gem_object_put(obj);
+ }
+ rcu_read_unlock();
+}
+
static const char * const uabi_class_names[] = {
[I915_ENGINE_CLASS_RENDER] = "render",
[I915_ENGINE_CLASS_COPY] = "copy",
@@ -255,83 +338,47 @@ int i915_drm_cgroup_signal_budget(struct drm_file *file, u64 usage, u64 budget)
return ret;
}
+
+unsigned int i915_drm_cgroup_num_memory_regions(const struct drm_device *dev)
+{
+ return INTEL_REGION_UNKNOWN;
+}
+
+const char *i915_drm_cgroup_memory_region_name(const struct drm_device *dev,
+ unsigned int index)
+{
+ const struct drm_i915_private *i915 = to_i915(dev);
+
+ if (index < ARRAY_SIZE(i915->mm.regions)) {
+ struct intel_memory_region *mr = i915->mm.regions[index];
+
+ if (mr)
+ return mr->name;
+ }
+
+ return NULL;
+}
+
+unsigned int i915_drm_cgroup_memory_stats(struct drm_file *file,
+ struct drm_memory_stats *stats,
+ unsigned int num)
+{
+ memory_stats(file, stats, num);
+
+ return DRM_GEM_OBJECT_RESIDENT | DRM_GEM_OBJECT_PURGEABLE;
+}
#endif
#ifdef CONFIG_PROC_FS
-static void
-obj_meminfo(struct drm_i915_gem_object *obj,
- struct drm_memory_stats stats[INTEL_REGION_UNKNOWN])
-{
- struct intel_memory_region *mr;
- u64 sz = obj->base.size;
- enum intel_region_id id;
- unsigned int i;
-
- /* Attribute size and shared to all possible memory regions. */
- for (i = 0; i < obj->mm.n_placements; i++) {
- mr = obj->mm.placements[i];
- id = mr->id;
-
- if (obj->base.handle_count > 1)
- stats[id].shared += sz;
- else
- stats[id].private += sz;
- }
-
- /* Attribute other categories to only the current region. */
- mr = obj->mm.region;
- if (mr)
- id = mr->id;
- else
- id = INTEL_REGION_SMEM;
-
- if (!obj->mm.n_placements) {
- if (obj->base.handle_count > 1)
- stats[id].shared += sz;
- else
- stats[id].private += sz;
- }
-
- if (i915_gem_object_has_pages(obj)) {
- stats[id].resident += sz;
-
- if (!dma_resv_test_signaled(obj->base.resv,
- dma_resv_usage_rw(true)))
- stats[id].active += sz;
- else if (i915_gem_object_is_shrinkable(obj) &&
- obj->mm.madv == I915_MADV_DONTNEED)
- stats[id].purgeable += sz;
- }
-}
-
static void show_meminfo(struct drm_printer *p, struct drm_file *file)
{
struct drm_memory_stats stats[INTEL_REGION_UNKNOWN] = {};
struct drm_i915_file_private *fpriv = file->driver_priv;
- struct i915_drm_client *client = fpriv->client;
struct drm_i915_private *i915 = fpriv->i915;
- struct drm_i915_gem_object *obj;
struct intel_memory_region *mr;
- struct list_head *pos;
unsigned int id;
- /* Public objects. */
- spin_lock(&file->table_lock);
- idr_for_each_entry(&file->object_idr, obj, id)
- obj_meminfo(obj, stats);
- spin_unlock(&file->table_lock);
-
- /* Internal objects. */
- rcu_read_lock();
- list_for_each_rcu(pos, &client->objects_list) {
- obj = i915_gem_object_get_rcu(list_entry(pos, typeof(*obj),
- client_link));
- if (!obj)
- continue;
- obj_meminfo(obj, stats);
- i915_gem_object_put(obj);
- }
- rcu_read_unlock();
+ memory_stats(file, stats, ARRAY_SIZE(stats));
for_each_memory_region(mr, i915, id)
drm_print_memory_stats(p,
@@ -382,7 +429,9 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
show_client_class(p, i915, file_priv->client, i);
}
+#endif
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_CGROUP_DRM)
void i915_drm_client_add_object(struct i915_drm_client *client,
struct drm_i915_gem_object *obj)
{
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 6eadc9596b8f..8b34be25e887 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -29,7 +29,7 @@ struct i915_drm_client {
spinlock_t ctx_lock; /* For add/remove from ctx_list. */
struct list_head ctx_list; /* List of contexts belonging to client. */
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_CGROUP_DRM)
/**
* @objects_lock: lock protecting @objects_list
*/
@@ -74,7 +74,7 @@ struct i915_drm_client *i915_drm_client_alloc(void);
void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
-#ifdef CONFIG_PROC_FS
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_CGROUP_DRM)
void i915_drm_client_add_object(struct i915_drm_client *client,
struct drm_i915_gem_object *obj);
bool i915_drm_client_remove_object(struct drm_i915_gem_object *obj);
@@ -101,4 +101,11 @@ u64 i915_drm_cgroup_get_active_time_us(struct drm_file *file);
int i915_drm_cgroup_signal_budget(struct drm_file *file,
u64 usage, u64 budget);
+unsigned int i915_drm_cgroup_num_memory_regions(const struct drm_device *);
+const char *i915_drm_cgroup_memory_region_name(const struct drm_device *,
+ unsigned int index);
+unsigned int i915_drm_cgroup_memory_stats(struct drm_file *,
+ struct drm_memory_stats *,
+ unsigned int num);
+
#endif /* !__I915_DRM_CLIENT_H__ */
--
2.39.2
^ permalink raw reply related [flat|nested] 39+ messages in thread* Re: [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats
2023-07-12 11:45 [RFC v5 00/17] DRM cgroup controller with scheduling control and memory stats Tvrtko Ursulin
` (16 preceding siblings ...)
2023-07-12 11:46 ` [PATCH 17/17] drm/i915: Wire up to the drm cgroup " Tvrtko Ursulin
@ 2023-07-19 20:31 ` T.J. Mercier
[not found] ` <CABdmKX1PUF+X897ZMOr0RNiYdoiL_2NkcSt+Eh55BfW-05LopQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
17 siblings, 1 reply; 39+ messages in thread
From: T.J. Mercier @ 2023-07-19 20:31 UTC (permalink / raw)
To: Tvrtko Ursulin
Cc: Rob Clark, Kenny.Ho, Dave Airlie, Stéphane Marchesin,
Daniel Vetter, Intel-gfx, linux-kernel, dri-devel, Zefan Li,
Johannes Weiner, Tejun Heo, cgroups, Eero Tamminen,
Christian König
On Wed, Jul 12, 2023 at 4:47 AM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
> drm.memory.stat
> A nested file containing cumulative memory statistics for the whole
> sub-hierarchy, broken down into separate GPUs and separate memory
> regions supported by the latter.
>
> For example::
>
> $ cat drm.memory.stat
> card0 region=system total=12898304 shared=0 active=0 resident=12111872 purgeable=167936
> card0 region=stolen-system total=0 shared=0 active=0 resident=0 purgeable=0
>
> Card designation corresponds to the DRM device names and multiple line
> entries can be present per card.
>
> Memory region names should be expected to be driver specific with the
> exception of 'system' which is standardised and applicable for GPUs
> which can operate on system memory buffers.
>
> Sub-keys 'resident' and 'purgeable' are optional.
>
> Per category region usage is reported in bytes.
>
> * Feedback from people interested in drm.active_us and drm.memory.stat is
> required to understand the use cases and their usefulness (of the fields).
>
> Memory stats are something which was easy to add to my series, since I was
> already working on the fdinfo memory stats patches, but the question is how
> useful it is.
>
Hi Tvrtko,
I think this style of driver-defined categories for reporting of
memory could potentially allow us to eliminate the GPU memory tracking
tracepoint used on Android (gpu_mem_total). This would involve reading
drm.memory.stat at the root cgroup (I see it's currently disabled on
the root), which means traversing the whole cgroup tree under the
cgroup lock to generate the values on-demand. This would be done
rarely, but I still wonder what the cost of that would turn out to be.
The drm_memory_stats categories in the output don't seem like a big
value-add for this use-case, but no real objection to them being
there. I know it's called the DRM cgroup controller, but it'd be nice
if there were a way to make the mem tracking part work for any driver
that wishes to participate as many of our devices don't use a DRM
driver. But making that work doesn't look like it would fit very
cleanly into this controller, so I'll just shut up now.
Thanks!
-T.J.
^ permalink raw reply [flat|nested] 39+ messages in thread