public inbox for linux-modules@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v17 37/47] completion, dept: introduce init_completion_dmap() API
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Currently, dept uses dept's map embedded in task_struct to track
dependencies related to wait_for_completion() and its family.  So it
doesn't need an explicit map basically.

However, for those who want to set the maps with customized class or
key, introduce a new API to use external maps.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/completion.h | 40 +++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 4d8fb1d95c0a..e50f7d9b4b97 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -27,17 +27,15 @@
 struct completion {
 	unsigned int done;
 	struct swait_queue_head wait;
+	struct dept_map *dmap;
 };
 
-#define init_completion(x)				\
-do {							\
-	__init_completion(x);				\
-} while (0)
+#define init_completion(x) init_completion_dmap(x, NULL)
 
 /*
- * XXX: No use cases for now. Fill the body when needed.
+ * XXX: This usage using lockdep's map should be deprecated.
  */
-#define init_completion_map(x, m) init_completion(x)
+#define init_completion_map(x, m) init_completion_dmap(x, NULL)
 
 static inline void complete_acquire(struct completion *x, long timeout)
 {
@@ -48,8 +46,11 @@ static inline void complete_release(struct completion *x)
 }
 
 #define COMPLETION_INITIALIZER(work) \
-	{ 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait), }
+	{ 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait), .dmap = NULL, }
 
+/*
+ * XXX: This usage using lockdep's map should be deprecated.
+ */
 #define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \
 	(*({ init_completion_map(&(work), &(map)); &(work); }))
 
@@ -90,15 +91,18 @@ static inline void complete_release(struct completion *x)
 #endif
 
 /**
- * __init_completion - Initialize a dynamically allocated completion
+ * init_completion_dmap - Initialize a dynamically allocated completion
  * @x:  pointer to completion structure that is to be initialized
+ * @dmap:  pointer to external dept's map to be used as a separated map
  *
  * This inline function will initialize a dynamically created completion
  * structure.
  */
-static inline void __init_completion(struct completion *x)
+static inline void init_completion_dmap(struct completion *x,
+		struct dept_map *dmap)
 {
 	x->done = 0;
+	x->dmap = dmap;
 	init_swait_queue_head(&x->wait);
 }
 
@@ -136,13 +140,13 @@ extern void complete_all(struct completion *);
 
 #define wait_for_completion(x)						\
 ({									\
-	sdt_might_sleep_start_timeout(NULL, -1L);			\
+	sdt_might_sleep_start_timeout((x)->dmap, -1L);			\
 	__wait_for_completion(x);					\
 	sdt_might_sleep_end();						\
 })
 #define wait_for_completion_io(x)					\
 ({									\
-	sdt_might_sleep_start_timeout(NULL, -1L);			\
+	sdt_might_sleep_start_timeout((x)->dmap, -1L);			\
 	__wait_for_completion_io(x);					\
 	sdt_might_sleep_end();						\
 })
@@ -150,7 +154,7 @@ extern void complete_all(struct completion *);
 ({									\
 	int __ret;							\
 									\
-	sdt_might_sleep_start_timeout(NULL, -1L);			\
+	sdt_might_sleep_start_timeout((x)->dmap, -1L);			\
 	__ret = __wait_for_completion_interruptible(x);			\
 	sdt_might_sleep_end();						\
 	__ret;								\
@@ -159,7 +163,7 @@ extern void complete_all(struct completion *);
 ({									\
 	int __ret;							\
 									\
-	sdt_might_sleep_start_timeout(NULL, -1L);			\
+	sdt_might_sleep_start_timeout((x)->dmap, -1L);			\
 	__ret = __wait_for_completion_killable(x);			\
 	sdt_might_sleep_end();						\
 	__ret;								\
@@ -168,7 +172,7 @@ extern void complete_all(struct completion *);
 ({									\
 	int __ret;							\
 									\
-	sdt_might_sleep_start_timeout(NULL, -1L);			\
+	sdt_might_sleep_start_timeout((x)->dmap, -1L);			\
 	__ret = __wait_for_completion_state(x, s);			\
 	sdt_might_sleep_end();						\
 	__ret;								\
@@ -177,7 +181,7 @@ extern void complete_all(struct completion *);
 ({									\
 	unsigned long __ret;						\
 									\
-	sdt_might_sleep_start_timeout(NULL, t);				\
+	sdt_might_sleep_start_timeout((x)->dmap, t);			\
 	__ret = __wait_for_completion_timeout(x, t);			\
 	sdt_might_sleep_end();						\
 	__ret;								\
@@ -186,7 +190,7 @@ extern void complete_all(struct completion *);
 ({									\
 	unsigned long __ret;						\
 									\
-	sdt_might_sleep_start_timeout(NULL, t);				\
+	sdt_might_sleep_start_timeout((x)->dmap, t);			\
 	__ret = __wait_for_completion_io_timeout(x, t);			\
 	sdt_might_sleep_end();						\
 	__ret;								\
@@ -195,7 +199,7 @@ extern void complete_all(struct completion *);
 ({									\
 	long __ret;							\
 									\
-	sdt_might_sleep_start_timeout(NULL, t);				\
+	sdt_might_sleep_start_timeout((x)->dmap, t);			\
 	__ret = __wait_for_completion_interruptible_timeout(x, t);	\
 	sdt_might_sleep_end();						\
 	__ret;								\
@@ -204,7 +208,7 @@ extern void complete_all(struct completion *);
 ({									\
 	long __ret;							\
 									\
-	sdt_might_sleep_start_timeout(NULL, t);				\
+	sdt_might_sleep_start_timeout((x)->dmap, t);			\
 	__ret = __wait_for_completion_killable_timeout(x, t);		\
 	sdt_might_sleep_end();						\
 	__ret;								\
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 36/47] dept: assign unique dept_key to each distinct wait_for_completion() caller
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

wait_for_completion() can be used at various points in the code and it's
very hard to distinguish wait_for_completion()s between different usages.
Using a single dept_key for all the wait_for_completion()s could trigger
false positive reports.

Assign unique dept_key to each distinct wait_for_completion() caller to
avoid false positive reports.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/completion.h | 100 +++++++++++++++++++++++++++++++------
 kernel/sched/completion.c  |  60 +++++++++++-----------
 2 files changed, 115 insertions(+), 45 deletions(-)

diff --git a/include/linux/completion.h b/include/linux/completion.h
index 3200b741de28..4d8fb1d95c0a 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -27,12 +27,10 @@
 struct completion {
 	unsigned int done;
 	struct swait_queue_head wait;
-	struct dept_map dmap;
 };
 
 #define init_completion(x)				\
 do {							\
-	sdt_map_init(&(x)->dmap);			\
 	__init_completion(x);				\
 } while (0)
 
@@ -43,17 +41,14 @@ do {							\
 
 static inline void complete_acquire(struct completion *x, long timeout)
 {
-	sdt_might_sleep_start_timeout(&x->dmap, timeout);
 }
 
 static inline void complete_release(struct completion *x)
 {
-	sdt_might_sleep_end();
 }
 
 #define COMPLETION_INITIALIZER(work) \
-	{ 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait), \
-	  .dmap = DEPT_MAP_INITIALIZER(work, NULL), }
+	{ 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait), }
 
 #define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \
 	(*({ init_completion_map(&(work), &(map)); &(work); }))
@@ -119,18 +114,18 @@ static inline void reinit_completion(struct completion *x)
 	x->done = 0;
 }
 
-extern void wait_for_completion(struct completion *);
-extern void wait_for_completion_io(struct completion *);
-extern int wait_for_completion_interruptible(struct completion *x);
-extern int wait_for_completion_killable(struct completion *x);
-extern int wait_for_completion_state(struct completion *x, unsigned int state);
-extern unsigned long wait_for_completion_timeout(struct completion *x,
+extern void __wait_for_completion(struct completion *);
+extern void __wait_for_completion_io(struct completion *);
+extern int __wait_for_completion_interruptible(struct completion *x);
+extern int __wait_for_completion_killable(struct completion *x);
+extern int __wait_for_completion_state(struct completion *x, unsigned int state);
+extern unsigned long __wait_for_completion_timeout(struct completion *x,
 						   unsigned long timeout);
-extern unsigned long wait_for_completion_io_timeout(struct completion *x,
+extern unsigned long __wait_for_completion_io_timeout(struct completion *x,
 						    unsigned long timeout);
-extern long wait_for_completion_interruptible_timeout(
+extern long __wait_for_completion_interruptible_timeout(
 	struct completion *x, unsigned long timeout);
-extern long wait_for_completion_killable_timeout(
+extern long __wait_for_completion_killable_timeout(
 	struct completion *x, unsigned long timeout);
 extern bool try_wait_for_completion(struct completion *x);
 extern bool completion_done(struct completion *x);
@@ -139,4 +134,79 @@ extern void complete(struct completion *);
 extern void complete_on_current_cpu(struct completion *x);
 extern void complete_all(struct completion *);
 
+#define wait_for_completion(x)						\
+({									\
+	sdt_might_sleep_start_timeout(NULL, -1L);			\
+	__wait_for_completion(x);					\
+	sdt_might_sleep_end();						\
+})
+#define wait_for_completion_io(x)					\
+({									\
+	sdt_might_sleep_start_timeout(NULL, -1L);			\
+	__wait_for_completion_io(x);					\
+	sdt_might_sleep_end();						\
+})
+#define wait_for_completion_interruptible(x)				\
+({									\
+	int __ret;							\
+									\
+	sdt_might_sleep_start_timeout(NULL, -1L);			\
+	__ret = __wait_for_completion_interruptible(x);			\
+	sdt_might_sleep_end();						\
+	__ret;								\
+})
+#define wait_for_completion_killable(x)					\
+({									\
+	int __ret;							\
+									\
+	sdt_might_sleep_start_timeout(NULL, -1L);			\
+	__ret = __wait_for_completion_killable(x);			\
+	sdt_might_sleep_end();						\
+	__ret;								\
+})
+#define wait_for_completion_state(x, s)					\
+({									\
+	int __ret;							\
+									\
+	sdt_might_sleep_start_timeout(NULL, -1L);			\
+	__ret = __wait_for_completion_state(x, s);			\
+	sdt_might_sleep_end();						\
+	__ret;								\
+})
+#define wait_for_completion_timeout(x, t)				\
+({									\
+	unsigned long __ret;						\
+									\
+	sdt_might_sleep_start_timeout(NULL, t);				\
+	__ret = __wait_for_completion_timeout(x, t);			\
+	sdt_might_sleep_end();						\
+	__ret;								\
+})
+#define wait_for_completion_io_timeout(x, t)				\
+({									\
+	unsigned long __ret;						\
+									\
+	sdt_might_sleep_start_timeout(NULL, t);				\
+	__ret = __wait_for_completion_io_timeout(x, t);			\
+	sdt_might_sleep_end();						\
+	__ret;								\
+})
+#define wait_for_completion_interruptible_timeout(x, t)			\
+({									\
+	long __ret;							\
+									\
+	sdt_might_sleep_start_timeout(NULL, t);				\
+	__ret = __wait_for_completion_interruptible_timeout(x, t);	\
+	sdt_might_sleep_end();						\
+	__ret;								\
+})
+#define wait_for_completion_killable_timeout(x, t)			\
+({									\
+	long __ret;							\
+									\
+	sdt_might_sleep_start_timeout(NULL, t);				\
+	__ret = __wait_for_completion_killable_timeout(x, t);		\
+	sdt_might_sleep_end();						\
+	__ret;								\
+})
 #endif
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 5e45a60ff7b3..7262000db114 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -4,7 +4,7 @@
  * Generic wait-for-completion handler;
  *
  * It differs from semaphores in that their default case is the opposite,
- * wait_for_completion default blocks whereas semaphore default non-block. The
+ * __wait_for_completion default blocks whereas semaphore default non-block. The
  * interface also makes it easy to 'complete' multiple waiting threads,
  * something which isn't entirely natural for semaphores.
  *
@@ -42,7 +42,7 @@ void complete_on_current_cpu(struct completion *x)
  * This will wake up a single thread waiting on this completion. Threads will be
  * awakened in the same order in which they were queued.
  *
- * See also complete_all(), wait_for_completion() and related routines.
+ * See also complete_all(), __wait_for_completion() and related routines.
  *
  * If this function wakes up a task, it executes a full memory barrier before
  * accessing the task state.
@@ -139,23 +139,23 @@ wait_for_common_io(struct completion *x, long timeout, int state)
 }
 
 /**
- * wait_for_completion: - waits for completion of a task
+ * __wait_for_completion: - waits for completion of a task
  * @x:  holds the state of this particular completion
  *
  * This waits to be signaled for completion of a specific task. It is NOT
  * interruptible and there is no timeout.
  *
- * See also similar routines (i.e. wait_for_completion_timeout()) with timeout
+ * See also similar routines (i.e. __wait_for_completion_timeout()) with timeout
  * and interrupt capability. Also see complete().
  */
-void __sched wait_for_completion(struct completion *x)
+void __sched __wait_for_completion(struct completion *x)
 {
 	wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
 }
-EXPORT_SYMBOL(wait_for_completion);
+EXPORT_SYMBOL(__wait_for_completion);
 
 /**
- * wait_for_completion_timeout: - waits for completion of a task (w/timeout)
+ * __wait_for_completion_timeout: - waits for completion of a task (w/timeout)
  * @x:  holds the state of this particular completion
  * @timeout:  timeout value in jiffies
  *
@@ -167,28 +167,28 @@ EXPORT_SYMBOL(wait_for_completion);
  * till timeout) if completed.
  */
 unsigned long __sched
-wait_for_completion_timeout(struct completion *x, unsigned long timeout)
+__wait_for_completion_timeout(struct completion *x, unsigned long timeout)
 {
 	return wait_for_common(x, timeout, TASK_UNINTERRUPTIBLE);
 }
-EXPORT_SYMBOL(wait_for_completion_timeout);
+EXPORT_SYMBOL(__wait_for_completion_timeout);
 
 /**
- * wait_for_completion_io: - waits for completion of a task
+ * __wait_for_completion_io: - waits for completion of a task
  * @x:  holds the state of this particular completion
  *
  * This waits to be signaled for completion of a specific task. It is NOT
  * interruptible and there is no timeout. The caller is accounted as waiting
  * for IO (which traditionally means blkio only).
  */
-void __sched wait_for_completion_io(struct completion *x)
+void __sched __wait_for_completion_io(struct completion *x)
 {
 	wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
 }
-EXPORT_SYMBOL(wait_for_completion_io);
+EXPORT_SYMBOL(__wait_for_completion_io);
 
 /**
- * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
+ * __wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
  * @x:  holds the state of this particular completion
  * @timeout:  timeout value in jiffies
  *
@@ -201,14 +201,14 @@ EXPORT_SYMBOL(wait_for_completion_io);
  * till timeout) if completed.
  */
 unsigned long __sched
-wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
+__wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
 {
 	return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
 }
-EXPORT_SYMBOL(wait_for_completion_io_timeout);
+EXPORT_SYMBOL(__wait_for_completion_io_timeout);
 
 /**
- * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
+ * __wait_for_completion_interruptible: - waits for completion of a task (w/intr)
  * @x:  holds the state of this particular completion
  *
  * This waits for completion of a specific task to be signaled. It is
@@ -216,7 +216,7 @@ EXPORT_SYMBOL(wait_for_completion_io_timeout);
  *
  * Return: -ERESTARTSYS if interrupted, 0 if completed.
  */
-int __sched wait_for_completion_interruptible(struct completion *x)
+int __sched __wait_for_completion_interruptible(struct completion *x)
 {
 	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
 
@@ -224,10 +224,10 @@ int __sched wait_for_completion_interruptible(struct completion *x)
 		return t;
 	return 0;
 }
-EXPORT_SYMBOL(wait_for_completion_interruptible);
+EXPORT_SYMBOL(__wait_for_completion_interruptible);
 
 /**
- * wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
+ * __wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
  * @x:  holds the state of this particular completion
  * @timeout:  timeout value in jiffies
  *
@@ -238,15 +238,15 @@ EXPORT_SYMBOL(wait_for_completion_interruptible);
  * or number of jiffies left till timeout) if completed.
  */
 long __sched
-wait_for_completion_interruptible_timeout(struct completion *x,
+__wait_for_completion_interruptible_timeout(struct completion *x,
 					  unsigned long timeout)
 {
 	return wait_for_common(x, timeout, TASK_INTERRUPTIBLE);
 }
-EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
+EXPORT_SYMBOL(__wait_for_completion_interruptible_timeout);
 
 /**
- * wait_for_completion_killable: - waits for completion of a task (killable)
+ * __wait_for_completion_killable: - waits for completion of a task (killable)
  * @x:  holds the state of this particular completion
  *
  * This waits to be signaled for completion of a specific task. It can be
@@ -254,7 +254,7 @@ EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
  *
  * Return: -ERESTARTSYS if interrupted, 0 if completed.
  */
-int __sched wait_for_completion_killable(struct completion *x)
+int __sched __wait_for_completion_killable(struct completion *x)
 {
 	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
 
@@ -262,9 +262,9 @@ int __sched wait_for_completion_killable(struct completion *x)
 		return t;
 	return 0;
 }
-EXPORT_SYMBOL(wait_for_completion_killable);
+EXPORT_SYMBOL(__wait_for_completion_killable);
 
-int __sched wait_for_completion_state(struct completion *x, unsigned int state)
+int __sched __wait_for_completion_state(struct completion *x, unsigned int state)
 {
 	long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, state);
 
@@ -272,10 +272,10 @@ int __sched wait_for_completion_state(struct completion *x, unsigned int state)
 		return t;
 	return 0;
 }
-EXPORT_SYMBOL(wait_for_completion_state);
+EXPORT_SYMBOL(__wait_for_completion_state);
 
 /**
- * wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
+ * __wait_for_completion_killable_timeout: - waits for completion of a task (w/(to,killable))
  * @x:  holds the state of this particular completion
  * @timeout:  timeout value in jiffies
  *
@@ -287,12 +287,12 @@ EXPORT_SYMBOL(wait_for_completion_state);
  * or number of jiffies left till timeout) if completed.
  */
 long __sched
-wait_for_completion_killable_timeout(struct completion *x,
+__wait_for_completion_killable_timeout(struct completion *x,
 				     unsigned long timeout)
 {
 	return wait_for_common(x, timeout, TASK_KILLABLE);
 }
-EXPORT_SYMBOL(wait_for_completion_killable_timeout);
+EXPORT_SYMBOL(__wait_for_completion_killable_timeout);
 
 /**
  *	try_wait_for_completion - try to decrement a completion without blocking
@@ -334,7 +334,7 @@ EXPORT_SYMBOL(try_wait_for_completion);
  *	completion_done - Test to see if a completion has any waiters
  *	@x:	completion structure
  *
- *	Return: 0 if there are waiters (wait_for_completion() in progress)
+ *	Return: 0 if there are waiters (__wait_for_completion() in progress)
  *		 1 if there are no waiters.
  *
  *	Note, this will always return true if complete_all() was called on @X.
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 34/47] dept: make dept stop from working on debug_locks_off()
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

For many reasons, debug_locks_off() is called to stop lock debuging
feature e.g. on panic().  dept should also stop it in the conditions.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/dept.h     | 2 ++
 kernel/dependency/dept.c | 6 ++++++
 lib/debug_locks.c        | 2 ++
 3 files changed, 10 insertions(+)

diff --git a/include/linux/dept.h b/include/linux/dept.h
index 8b4d97fc4627..b164f74e86e5 100644
--- a/include/linux/dept.h
+++ b/include/linux/dept.h
@@ -390,6 +390,7 @@ struct dept_ext_wgen {
 	unsigned int wgen;
 };
 
+extern void dept_stop_emerg(void);
 extern void dept_on(void);
 extern void dept_off(void);
 extern void dept_init(void);
@@ -442,6 +443,7 @@ struct dept_ext_wgen { };
 
 #define DEPT_MAP_INITIALIZER(n, k) { }
 
+#define dept_stop_emerg()				do { } while (0)
 #define dept_on()					do { } while (0)
 #define dept_off()					do { } while (0)
 #define dept_init()					do { } while (0)
diff --git a/kernel/dependency/dept.c b/kernel/dependency/dept.c
index a0eb4b108de0..1de61306418b 100644
--- a/kernel/dependency/dept.c
+++ b/kernel/dependency/dept.c
@@ -187,6 +187,12 @@ static void dept_unlock(void)
 	arch_spin_unlock(&dept_spin);
 }
 
+void dept_stop_emerg(void)
+{
+	WRITE_ONCE(dept_stop, 1);
+}
+EXPORT_SYMBOL_GPL(dept_stop_emerg);
+
 enum bfs_ret {
 	BFS_CONTINUE,
 	BFS_DONE,
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index a75ee30b77cb..14a965914a8f 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -38,6 +38,8 @@ EXPORT_SYMBOL_GPL(debug_locks_silent);
  */
 int debug_locks_off(void)
 {
+	dept_stop_emerg();
+
 	if (debug_locks && __debug_locks_off()) {
 		if (!debug_locks_silent) {
 			console_verbose();
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 35/47] i2c: rename wait_for_completion callback to wait_for_completion_cb
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Functionally no change.  This patch is a preparation for DEPT(DEPendency
Tracker) to track dependencies related to a scheduler API,
wait_for_completion().

Unfortunately, struct i2c_algo_pca_data has a callback member named
wait_for_completion, that is the same as the scheduler API, which makes
it hard to change the scheduler API to a macro form because of the
ambiguity.

Add a postfix _cb to the callback member to remove the ambiguity.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 drivers/i2c/algos/i2c-algo-pca.c      | 2 +-
 drivers/i2c/busses/i2c-pca-isa.c      | 2 +-
 drivers/i2c/busses/i2c-pca-platform.c | 2 +-
 include/linux/i2c-algo-pca.h          | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c
index 74b66aec33d4..ee86df4cff4b 100644
--- a/drivers/i2c/algos/i2c-algo-pca.c
+++ b/drivers/i2c/algos/i2c-algo-pca.c
@@ -30,7 +30,7 @@ static int i2c_debug;
 #define pca_clock(adap) adap->i2c_clock
 #define pca_set_con(adap, val) pca_outw(adap, I2C_PCA_CON, val)
 #define pca_get_con(adap) pca_inw(adap, I2C_PCA_CON)
-#define pca_wait(adap) adap->wait_for_completion(adap->data)
+#define pca_wait(adap) adap->wait_for_completion_cb(adap->data)
 
 static void pca_reset(struct i2c_algo_pca_data *adap)
 {
diff --git a/drivers/i2c/busses/i2c-pca-isa.c b/drivers/i2c/busses/i2c-pca-isa.c
index 85e8cf58e8bf..0cbf2f509527 100644
--- a/drivers/i2c/busses/i2c-pca-isa.c
+++ b/drivers/i2c/busses/i2c-pca-isa.c
@@ -95,7 +95,7 @@ static struct i2c_algo_pca_data pca_isa_data = {
 	/* .data intentionally left NULL, not needed with ISA */
 	.write_byte		= pca_isa_writebyte,
 	.read_byte		= pca_isa_readbyte,
-	.wait_for_completion	= pca_isa_waitforcompletion,
+	.wait_for_completion_cb	= pca_isa_waitforcompletion,
 	.reset_chip		= pca_isa_resetchip,
 };
 
diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c
index 87da8241b927..c0f35ebbe37d 100644
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -180,7 +180,7 @@ static int i2c_pca_pf_probe(struct platform_device *pdev)
 	}
 
 	i2c->algo_data.data = i2c;
-	i2c->algo_data.wait_for_completion = i2c_pca_pf_waitforcompletion;
+	i2c->algo_data.wait_for_completion_cb = i2c_pca_pf_waitforcompletion;
 	if (i2c->gpio)
 		i2c->algo_data.reset_chip = i2c_pca_pf_resetchip;
 	else
diff --git a/include/linux/i2c-algo-pca.h b/include/linux/i2c-algo-pca.h
index 7c522fdd9ea7..e305bf32e40a 100644
--- a/include/linux/i2c-algo-pca.h
+++ b/include/linux/i2c-algo-pca.h
@@ -71,7 +71,7 @@ struct i2c_algo_pca_data {
 	void 				*data;	/* private low level data */
 	void (*write_byte)		(void *data, int reg, int val);
 	int  (*read_byte)		(void *data, int reg);
-	int  (*wait_for_completion)	(void *data);
+	int  (*wait_for_completion_cb)	(void *data);
 	void (*reset_chip)		(void *data);
 	/* For PCA9564, use one of the predefined frequencies:
 	 * 330000, 288000, 217000, 146000, 88000, 59000, 44000, 36000
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 28/47] dept: add documentation for dept
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

This document describes the concept and APIs of dept.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 Documentation/dependency/dept.txt     | 735 ++++++++++++++++++++++++++
 Documentation/dependency/dept_api.txt | 117 ++++
 2 files changed, 852 insertions(+)
 create mode 100644 Documentation/dependency/dept.txt
 create mode 100644 Documentation/dependency/dept_api.txt

diff --git a/Documentation/dependency/dept.txt b/Documentation/dependency/dept.txt
new file mode 100644
index 000000000000..5dd358b96734
--- /dev/null
+++ b/Documentation/dependency/dept.txt
@@ -0,0 +1,735 @@
+DEPT(DEPendency Tracker)
+========================
+
+Started by Byungchul Park <max.byungchul.park@sk.com>
+
+How lockdep works
+-----------------
+
+Lockdep detects a deadlock by checking lock acquisition order. For
+example, a graph to track acquisition order built by lockdep might look
+like:
+
+   A -> B -
+           \
+            -> E
+           /
+   C -> D -
+
+   where 'A -> B' means that acquisition A is prior to acquisition B
+   with A still held.
+
+Lockdep keeps adding each new acquisition order into the graph in
+runtime. For example, 'E -> C' will be added when the two locks have
+been acquired in the order, E and then C. The graph will look like:
+
+       A -> B -
+               \
+                -> E -
+               /      \
+    -> C -> D -        \
+   /                   /
+   \                  /
+    ------------------
+
+   where 'A -> B' means that acquisition A is prior to acquisition B
+   with A still held.
+
+This graph contains a subgraph that demonstrates a loop like:
+
+                -> E -
+               /      \
+    -> C -> D -        \
+   /                   /
+   \                  /
+    ------------------
+
+   where 'A -> B' means that acquisition A is prior to acquisition B
+   with A still held.
+
+Lockdep reports it as a deadlock on detection of a loop and stops its
+working.
+
+CONCLUSION
+
+Lockdep detects a deadlock by checking if a loop has been created after
+adding a new acquisition order into the graph.
+
+
+Limitation of lockdep
+---------------------
+
+Lockdep deals with a deadlock by typical lock e.g. spinlock and mutex,
+that are supposed to be released within the acquisition context. However,
+when it comes to a deadlock by folio lock that is not supposed to be
+released within the acquisition context or other general synchronization
+mechanisms, lockdep doesn't work.
+
+Can lockdep detect the following deadlock?
+
+   context X	   context Y	   context Z
+
+		   mutex_lock A
+   folio_lock B
+		   folio_lock B <- DEADLOCK
+				   mutex_lock A <- DEADLOCK
+				   folio_unlock B
+		   folio_unlock B
+		   mutex_unlock A
+				   mutex_unlock A
+
+No. What about the following?
+
+   context X		   context Y
+
+			   mutex_lock A
+   mutex_lock A <- DEADLOCK
+			   wait_for_complete B <- DEADLOCK
+   complete B
+			   mutex_unlock A
+   mutex_unlock A
+
+No.
+
+CONCLUSION
+
+Lockdep cannot detect a deadlock by folio lock or other general
+synchronization mechanisms.
+
+
+What leads a deadlock
+---------------------
+
+A deadlock occurs when one or multi contexts are waiting for events that
+will never happen. For example:
+
+   context X	   context Y	   context Z
+
+   |		   |		   |
+   v		   |		   |
+   1 wait for A    v		   |
+   .		   2 wait for C    v
+   event C	   .		   3 wait for B
+		   event B	   .
+				   event A
+
+Event C cannot be triggered because context X is stuck at 1, event B
+cannot be triggered because context Y is stuck at 2, and event A cannot
+be triggered because context Z is stuck at 3. All the contexts are stuck.
+We call this *deadlock*.
+
+If an event occurrence is a prerequisite to reaching another event, we
+call it *dependency*. In this example:
+
+   Event A occurrence is a prerequisite to reaching event C.
+   Event C occurrence is a prerequisite to reaching event B.
+   Event B occurrence is a prerequisite to reaching event A.
+
+In terms of dependency:
+
+   Event C depends on event A.
+   Event B depends on event C.
+   Event A depends on event B.
+
+Dependency graph reflecting this example will look like:
+
+    -> C -> A -> B -
+   /                \
+   \                /
+    ----------------
+
+   where 'A -> B' means that event A depends on event B.
+
+A circular dependency exists. Such a circular dependency leads a
+deadlock since no waiters can have desired events triggered.
+
+CONCLUSION
+
+A circular dependency of events leads a deadlock.
+
+
+Introduce DEPT
+--------------
+
+DEPT(DEPendency Tracker) tracks wait and event instead of lock
+acquisition order so as to recognize the following situation:
+
+   context X	   context Y	   context Z
+
+   |		   |		   |
+   v		   |		   |
+   wait for A	   v		   |
+   .		   wait for C	   v
+   event C	   .		   wait for B
+		   event B	   .
+				   event A
+
+and builds up a dependency graph in runtime that is similar to lockdep.
+The graph might look like:
+
+    -> C -> A -> B -
+   /                \
+   \                /
+    ----------------
+
+   where 'A -> B' means that event A depends on event B.
+
+DEPT keeps adding each new dependency into the graph in runtime. For
+example, 'B -> D' will be added when event D occurrence is a
+prerequisite to reaching event B like:
+
+   |
+   v
+   wait for D
+   .
+   event B
+
+After the addition, the graph will look like:
+
+                     -> D
+                    /
+    -> C -> A -> B -
+   /                \
+   \                /
+    ----------------
+
+   where 'A -> B' means that event A depends on event B.
+
+DEPT is going to report a deadlock on detection of a new loop.
+
+CONCLUSION
+
+DEPT works on wait and event so as to theoretically detect all the
+potential deadlocks.
+
+
+How DEPT works
+--------------
+
+Let's take a look how DEPT works with the 1st example in the section
+'Limitation of lockdep'.
+
+   context X	   context Y	   context Z
+
+		   mutex_lock A
+   folio_lock B
+		   folio_lock B <- DEADLOCK
+				   mutex_lock A <- DEADLOCK
+				   folio_unlock B
+		   folio_unlock B
+		   mutex_unlock A
+				   mutex_unlock A
+
+Adding comments to describe DEPT's view in terms of wait and event:
+
+   context X	   context Y	   context Z
+
+		   mutex_lock A
+		   /* wait for A */
+   folio_lock B
+   /* wait for A */
+   /* start event A context */
+
+		   folio_lock B
+		   /* wait for B */ <- DEADLOCK
+		   /* start event B context */
+
+				   mutex_lock A
+				   /* wait for A */ <- DEADLOCK
+				   /* start event A context */
+
+				   folio_unlock B
+				   /* event B */
+		   folio_unlock B
+		   /* event B */
+
+		   mutex_unlock A
+		   /* event A */
+				   mutex_unlock A
+				   /* event A */
+
+Adding more supplementary comments to describe DEPT's view in detail:
+
+   context X	   context Y	   context Z
+
+		   mutex_lock A
+		   /* might wait for A */
+		   /* start to take into account event A's context */
+		   /* 1 */
+   folio_lock B
+   /* might wait for B */
+   /* start to take into account event B's context */
+   /* 2 */
+
+		   folio_lock B
+		   /* might wait for B */ <- DEADLOCK
+		   /* start to take into account event B's context */
+		   /* 3 */
+
+				   mutex_lock A
+				   /* might wait for A */ <- DEADLOCK
+				   /* start to take into account
+				      event A's context */
+				   /* 4 */
+
+				   folio_unlock B
+				   /* event B that's been valid since 2 */
+		   folio_unlock B
+		   /* event B that's been valid since 3 */
+
+		   mutex_unlock A
+		   /* event A that's been valid since 1 */
+
+				   mutex_unlock A
+				   /* event A that's been valid since 4 */
+
+Let's build up dependency graph with this example. Firstly, context X:
+
+   context X
+
+   folio_lock B
+   /* might wait for B */
+   /* start to take into account event B's context */
+   /* 2 */
+
+There are no events to create dependency. Next, context Y:
+
+   context Y
+
+   mutex_lock A
+   /* might wait for A */
+   /* start to take into account event A's context */
+   /* 1 */
+
+   folio_lock B
+   /* might wait for B */
+   /* start to take into account event B's context */
+   /* 3 */
+
+   folio_unlock B
+   /* event B that's been valid since 3 */
+
+   mutex_unlock A
+   /* event A that's been valid since 1 */
+
+There are two events. For event B, folio_unlock B, since there are no
+waits between 3 and the event, event B does not create dependency. For
+event A, there is a wait, folio_lock B, between 1 and the event. Which
+means event A cannot be triggered if event B does not wake up the wait.
+Therefore, we can say event A depends on event B, say, 'A -> B'. The
+graph will look like after adding the dependency:
+
+   A -> B
+
+   where 'A -> B' means that event A depends on event B.
+
+Lastly, context Z:
+
+   context Z
+
+   mutex_lock A
+   /* might wait for A */
+   /* start to take into account event A's context */
+   /* 4 */
+
+   folio_unlock B
+   /* event B that's been valid since 2 */
+
+   mutex_unlock A
+   /* event A that's been valid since 4 */
+
+There are also two events. For event B, folio_unlock B, there is a
+wait, mutex_lock A, between 2 and the event - remind 2 is at a very
+start and before the wait in timeline. Which means event B cannot be
+triggered if event A does not wake up the wait. Therefore, we can say
+event B depends on event A, say, 'B -> A'. The graph will look like
+after adding the dependency:
+
+    -> A -> B -
+   /           \
+   \           /
+    -----------
+
+   where 'A -> B' means that event A depends on event B.
+
+A new loop has been created. So DEPT can report it as a deadlock. For
+event A, mutex_unlock A, since there are no waits between 4 and the
+event, event A does not create dependency. That's it.
+
+CONCLUSION
+
+DEPT works well with any general synchronization mechanisms by focusing
+on wait, event and its context.
+
+
+Interpret DEPT report
+---------------------
+
+The following is the example in the section 'How DEPT works'.
+
+   context X	   context Y	   context Z
+
+		   mutex_lock A
+		   /* might wait for A */
+		   /* start to take into account event A's context */
+		   /* 1 */
+   folio_lock B
+   /* might wait for B */
+   /* start to take into account event B's context */
+   /* 2 */
+
+		   folio_lock B
+		   /* might wait for B */ <- DEADLOCK
+		   /* start to take into account event B's context */
+		   /* 3 */
+
+				   mutex_lock A
+				   /* might wait for A */ <- DEADLOCK
+				   /* start to take into account
+				      event A's context */
+				   /* 4 */
+
+				   folio_unlock B
+				   /* event B that's been valid since 2 */
+		   folio_unlock B
+		   /* event B that's been valid since 3 */
+
+		   mutex_unlock A
+		   /* event A that's been valid since 1 */
+
+				   mutex_unlock A
+				   /* event A that's been valid since 4 */
+
+We can Simplify this by replacing each waiting point with [W], each
+point where its event's context starts with [S] and each event with [E].
+This example will look like after the replacement:
+
+   context X	   context Y	   context Z
+
+		   [W][S] mutex_lock A
+   [W][S] folio_lock B
+		   [W][S] folio_lock B <- DEADLOCK
+
+				   [W][S] mutex_lock A <- DEADLOCK
+				   [E] folio_unlock B
+		   [E] folio_unlock B
+		   [E] mutex_unlock A
+				   [E] mutex_unlock A
+
+DEPT uses the symbols [W], [S] and [E] in its report as described above.
+The following is an example reported by DEPT for a real problem.
+
+   Link: https://lore.kernel.org/lkml/6383cde5-cf4b-facf-6e07-1378a485657d@I-love.SAKURA.ne.jp/#t
+   Link: https://lore.kernel.org/lkml/1674268856-31807-1-git-send-email-byungchul.park@lge.com/
+
+   ===================================================
+   DEPT: Circular dependency has been detected.
+   6.2.0-rc1-00025-gb0c20ebf51ac-dirty #28 Not tainted
+   ---------------------------------------------------
+   summary
+   ---------------------------------------------------
+   *** DEADLOCK ***
+
+   context A
+       [S] lock(&ni->ni_lock:0)
+       [W] folio_wait_bit_common(PG_locked_map:0)
+       [E] unlock(&ni->ni_lock:0)
+
+   context B
+       [S] (unknown)(PG_locked_map:0)
+       [W] lock(&ni->ni_lock:0)
+       [E] folio_unlock(PG_locked_map:0)
+
+   [S]: start of the event context
+   [W]: the wait blocked
+   [E]: the event not reachable
+   ---------------------------------------------------
+   context A's detail
+   ---------------------------------------------------
+   context A
+       [S] lock(&ni->ni_lock:0)
+       [W] folio_wait_bit_common(PG_locked_map:0)
+       [E] unlock(&ni->ni_lock:0)
+
+   [S] lock(&ni->ni_lock:0):
+   [<ffffffff82b396fb>] ntfs3_setattr+0x54b/0xd40
+   stacktrace:
+         ntfs3_setattr+0x54b/0xd40
+         notify_change+0xcb3/0x1430
+         do_truncate+0x149/0x210
+         path_openat+0x21a3/0x2a90
+         do_filp_open+0x1ba/0x410
+         do_sys_openat2+0x16d/0x4e0
+         __x64_sys_creat+0xcd/0x120
+         do_syscall_64+0x41/0xc0
+         entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+   [W] folio_wait_bit_common(PG_locked_map:0):
+   [<ffffffff81b228b0>] truncate_inode_pages_range+0x9b0/0xf20
+   stacktrace:
+         folio_wait_bit_common+0x5e0/0xaf0
+         truncate_inode_pages_range+0x9b0/0xf20
+         truncate_pagecache+0x67/0x90
+         ntfs3_setattr+0x55a/0xd40
+         notify_change+0xcb3/0x1430
+         do_truncate+0x149/0x210
+         path_openat+0x21a3/0x2a90
+         do_filp_open+0x1ba/0x410
+         do_sys_openat2+0x16d/0x4e0
+         __x64_sys_creat+0xcd/0x120
+         do_syscall_64+0x41/0xc0
+         entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+   [E] unlock(&ni->ni_lock:0):
+   (N/A)
+   ---------------------------------------------------
+   context B's detail
+   ---------------------------------------------------
+   context B
+       [S] (unknown)(PG_locked_map:0)
+       [W] lock(&ni->ni_lock:0)
+       [E] folio_unlock(PG_locked_map:0)
+
+   [S] (unknown)(PG_locked_map:0):
+   (N/A)
+
+   [W] lock(&ni->ni_lock:0):
+   [<ffffffff82b009ec>] attr_data_get_block+0x32c/0x19f0
+   stacktrace:
+         attr_data_get_block+0x32c/0x19f0
+         ntfs_get_block_vbo+0x264/0x1330
+         __block_write_begin_int+0x3bd/0x14b0
+         block_write_begin+0xb9/0x4d0
+         ntfs_write_begin+0x27e/0x480
+         generic_perform_write+0x256/0x570
+         __generic_file_write_iter+0x2ae/0x500
+         ntfs_file_write_iter+0x66d/0x1d70
+         do_iter_readv_writev+0x20b/0x3c0
+         do_iter_write+0x188/0x710
+         vfs_iter_write+0x74/0xa0
+         iter_file_splice_write+0x745/0xc90
+         direct_splice_actor+0x114/0x180
+         splice_direct_to_actor+0x33b/0x8b0
+         do_splice_direct+0x1b7/0x280
+         do_sendfile+0xb49/0x1310
+
+   [E] folio_unlock(PG_locked_map:0):
+   [<ffffffff81f10222>] generic_write_end+0xf2/0x440
+   stacktrace:
+         generic_write_end+0xf2/0x440
+         ntfs_write_end+0x42e/0x980
+         generic_perform_write+0x316/0x570
+         __generic_file_write_iter+0x2ae/0x500
+         ntfs_file_write_iter+0x66d/0x1d70
+         do_iter_readv_writev+0x20b/0x3c0
+         do_iter_write+0x188/0x710
+         vfs_iter_write+0x74/0xa0
+         iter_file_splice_write+0x745/0xc90
+         direct_splice_actor+0x114/0x180
+         splice_direct_to_actor+0x33b/0x8b0
+         do_splice_direct+0x1b7/0x280
+         do_sendfile+0xb49/0x1310
+         __x64_sys_sendfile64+0x1d0/0x210
+         do_syscall_64+0x41/0xc0
+         entry_SYSCALL_64_after_hwframe+0x63/0xcd
+   ---------------------------------------------------
+   information that might be helpful
+   ---------------------------------------------------
+   CPU: 1 PID: 8060 Comm: a.out Not tainted
+	6.2.0-rc1-00025-gb0c20ebf51ac-dirty #28
+   Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
+	BIOS Bochs 01/01/2011
+   Call Trace:
+    <TASK>
+    dump_stack_lvl+0xf2/0x169
+    print_circle.cold+0xca4/0xd28
+    ? lookup_dep+0x240/0x240
+    ? extend_queue+0x223/0x300
+    cb_check_dl+0x1e7/0x260
+    bfs+0x27b/0x610
+    ? print_circle+0x240/0x240
+    ? llist_add_batch+0x180/0x180
+    ? extend_queue_rev+0x300/0x300
+    ? __add_dep+0x60f/0x810
+    add_dep+0x221/0x5b0
+    ? __add_idep+0x310/0x310
+    ? add_iecxt+0x1bc/0xa60
+    ? add_iecxt+0x1bc/0xa60
+    ? add_iecxt+0x1bc/0xa60
+    ? add_iecxt+0x1bc/0xa60
+    __dept_wait+0x600/0x1490
+    ? add_iecxt+0x1bc/0xa60
+    ? truncate_inode_pages_range+0x9b0/0xf20
+    ? check_new_class+0x790/0x790
+    ? dept_enirq_transition+0x519/0x9c0
+    dept_wait+0x159/0x3b0
+    ? truncate_inode_pages_range+0x9b0/0xf20
+    folio_wait_bit_common+0x5e0/0xaf0
+    ? filemap_get_folios_contig+0xa30/0xa30
+    ? dept_enirq_transition+0x519/0x9c0
+    ? lock_is_held_type+0x10e/0x160
+    ? lock_is_held_type+0x11e/0x160
+    truncate_inode_pages_range+0x9b0/0xf20
+    ? truncate_inode_partial_folio+0xba0/0xba0
+    ? setattr_prepare+0x142/0xc40
+    truncate_pagecache+0x67/0x90
+    ntfs3_setattr+0x55a/0xd40
+    ? ktime_get_coarse_real_ts64+0x1e5/0x2f0
+    ? ntfs_extend+0x5c0/0x5c0
+    ? mode_strip_sgid+0x210/0x210
+    ? ntfs_extend+0x5c0/0x5c0
+    notify_change+0xcb3/0x1430
+    ? do_truncate+0x149/0x210
+    do_truncate+0x149/0x210
+    ? file_open_root+0x430/0x430
+    ? process_measurement+0x18c0/0x18c0
+    ? ntfs_file_release+0x230/0x230
+    path_openat+0x21a3/0x2a90
+    ? path_lookupat+0x840/0x840
+    ? dept_enirq_transition+0x519/0x9c0
+    ? lock_is_held_type+0x10e/0x160
+    do_filp_open+0x1ba/0x410
+    ? may_open_dev+0xf0/0xf0
+    ? find_held_lock+0x2d/0x110
+    ? lock_release+0x43c/0x830
+    ? dept_ecxt_exit+0x31a/0x590
+    ? _raw_spin_unlock+0x3b/0x50
+    ? alloc_fd+0x2de/0x6e0
+    do_sys_openat2+0x16d/0x4e0
+    ? __ia32_sys_get_robust_list+0x3b0/0x3b0
+    ? build_open_flags+0x6f0/0x6f0
+    ? dept_enirq_transition+0x519/0x9c0
+    ? dept_enirq_transition+0x519/0x9c0
+    ? lock_is_held_type+0x4e/0x160
+    ? lock_is_held_type+0x4e/0x160
+    __x64_sys_creat+0xcd/0x120
+    ? __x64_compat_sys_openat+0x1f0/0x1f0
+    do_syscall_64+0x41/0xc0
+    entry_SYSCALL_64_after_hwframe+0x63/0xcd
+   RIP: 0033:0x7f8b9e4e4469
+   Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48
+   89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48>
+   3d 01 f0 ff ff 73 01 c3 48 8b 0d ff 49 2b 00 f7 d8 64 89 01 48
+   RSP: 002b:00007f8b9eea4ef8 EFLAGS: 00000202 ORIG_RAX: 0000000000000055
+   RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f8b9e4e4469
+   RDX: 0000000000737562 RSI: 0000000000000000 RDI: 0000000020000000
+   RBP: 00007f8b9eea4f20 R08: 0000000000000000 R09: 0000000000000000
+   R10: 0000000000000000 R11: 0000000000000202 R12: 00007fffa75511ee
+   R13: 00007fffa75511ef R14: 00007f8b9ee85000 R15: 0000000000000003
+    </TASK>
+
+Let's take a look at the summary that is the most important part.
+
+   ---------------------------------------------------
+   summary
+   ---------------------------------------------------
+   *** DEADLOCK ***
+
+   context A
+       [S] lock(&ni->ni_lock:0)
+       [W] folio_wait_bit_common(PG_locked_map:0)
+       [E] unlock(&ni->ni_lock:0)
+
+   context B
+       [S] (unknown)(PG_locked_map:0)
+       [W] lock(&ni->ni_lock:0)
+       [E] folio_unlock(PG_locked_map:0)
+
+   [S]: start of the event context
+   [W]: the wait blocked
+   [E]: the event not reachable
+
+The summary shows the following scenario:
+
+   context A	   context B	   context ?(unknown)
+
+				   [S] folio_lock(&f1)
+   [S] lock(&ni->ni_lock:0)
+   [W] folio_wait_bit_common(PG_locked_map:0)
+
+		   [W] lock(&ni->ni_lock:0)
+		   [E] folio_unlock(&f1)
+
+   [E] unlock(&ni->ni_lock:0)
+
+Adding supplementary comments to describe DEPT's view in detail:
+
+   context A	   context B	   context ?(unknown)
+
+				   [S] folio_lock(&f1)
+				   /* start to take into account context
+				      B heading for folio_unlock(&f1) */
+				   /* 1 */
+   [S] lock(&ni->ni_lock:0)
+   /* start to take into account this context heading for
+      unlock(&ni->ni_lock:0) */
+   /* 2 */
+
+   [W] folio_wait_bit_common(PG_locked_map:0) (= folio_lock(&f1))
+   /* might wait for folio_unlock(&f1) */
+
+		   [W] lock(&ni->ni_lock:0)
+		   /* might wait for unlock(&ni->ni_lock:0) */
+
+		   [E] folio_unlock(&f1)
+		   /* event that's been valid since 1 */
+
+   [E] unlock(&ni->ni_lock:0)
+   /* event that's been valid since 2 */
+
+Let's build up dependency graph with this report. Firstly, context A:
+
+   context A
+
+   [S] lock(&ni->ni_lock:0)
+   /* start to take into account this context heading for
+      unlock(&ni->ni_lock:0) */
+   /* 2 */
+
+   [W] folio_wait_bit_common(PG_locked_map:0) (= folio_lock(&f1))
+   /* might wait for folio_unlock(&f1) */
+
+   [E] unlock(&ni->ni_lock:0)
+   /* event that's been valid since 2 */
+
+There is one interesting event, unlock(&ni->ni_lock:0). There is a
+wait, folio_lock(&f1), between 2 and the event. Which means
+unlock(&ni->ni_lock:0) is not reachable if folio_unlock(&f1) does not
+wake up the wait. Therefore, we can say unlock(&ni->ni_lock:0) depends
+on folio_unlock(&f1), say, 'unlock(&ni->ni_lock:0) -> folio_unlock(&f1)'.
+The graph will look like after adding the dependency:
+
+   unlock(&ni->ni_lock:0) -> folio_unlock(&f1)
+
+   where 'A -> B' means that event A depends on event B.
+
+Secondly, context B:
+
+   context B
+
+   [W] lock(&ni->ni_lock:0)
+   /* might wait for unlock(&ni->ni_lock:0) */
+
+   [E] folio_unlock(&f1)
+   /* event that's been valid since 1 */
+
+There is also one interesting event, folio_unlock(&f1). There is a
+wait, lock(&ni->ni_lock:0), between 1 and the event - remind 1 is at a
+very start and before the wait in timeline. Which means folio_unlock(&f1)
+is not reachable if unlock(&ni->ni_lock:0) does not wake up the wait.
+Therefore, we can say folio_unlock(&f1) depends on unlock(&ni->ni_lock:0),
+say, 'folio_unlock(&f1) -> unlock(&ni->ni_lock:0)'. The graph will look
+like after adding the dependency:
+
+    -> unlock(&ni->ni_lock:0) -> folio_unlock(&f1) -
+   /                                                \
+   \                                                /
+    ------------------------------------------------
+
+   where 'A -> B' means that event A depends on event B.
+
+A new loop has been created. So DEPT can report it as a deadlock! Cool!
+
+CONCLUSION
+
+DEPT works awesome!
diff --git a/Documentation/dependency/dept_api.txt b/Documentation/dependency/dept_api.txt
new file mode 100644
index 000000000000..8e0d5a118a46
--- /dev/null
+++ b/Documentation/dependency/dept_api.txt
@@ -0,0 +1,117 @@
+DEPT(DEPendency Tracker) APIs
+=============================
+
+Started by Byungchul Park <max.byungchul.park@sk.com>
+
+SDT(Single-event Dependency Tracker) APIs
+-----------------------------------------
+Use these APIs to annotate on either wait or event.  These have been
+already applied into the existing synchronization primitives e.g.
+waitqueue, swait, wait_for_completion(), dma fence and so on.  The basic
+APIs of SDT are:
+
+   /*
+    * After defining 'struct dept_map map', initialize the instance.
+    */
+   sdt_map_init(map);
+
+   /*
+    * Place just before the interesting wait.
+    */
+   sdt_wait(map);
+
+   /*
+    * Place just before the interesting event.
+    */
+   sdt_event(map);
+
+The advanced APIs of SDT are:
+
+   /*
+    * After defining 'struct dept_map map', initialize the instance
+    * using an external key.
+    */
+   sdt_map_init_key(map, key);
+
+   /*
+    * Place just before the interesting timeout wait.
+    */
+   sdt_wait_timeout(map, time);
+
+   /*
+    * Use sdt_might_sleep_start() and sdt_might_sleep_end() in pair.
+    * Place at the start of the interesting section that might enter
+    * schedule() or its family that needs to be woken up by
+    * try_to_wake_up().
+    */
+   sdt_might_sleep_start(map);
+
+   /*
+    * Use sdt_might_sleep_start_timeout() and sdt_might_sleep_end() in
+    * pair.  Place at the start of the interesting section that might
+    * enter schedule_timeout() or its family that needs to be woken up
+    * by try_to_wake_up().
+    */
+   sdt_might_sleep_start_timeout(map, time);
+
+   /*
+    * Use sdt_might_sleep_start() and sdt_might_sleep_end() in pair.
+    * Place at the end of the interesting section that might enter
+    * schedule(), schedule_timeout() or its family that needs to be
+    * woken up by try_to_wake_up().
+    */
+   sdt_might_sleep_end();
+
+   /*
+    * Use sdt_ecxt_enter() and sdt_ecxt_exit() in pair.  Place at the
+    * start of the interesting section where the interesting event might
+    * be triggered.
+    */
+   sdt_ecxt_enter(map);
+
+   /*
+    * Use sdt_ecxt_enter() and sdt_ecxt_exit() in pair.  Place at the
+    * end of the interesting section where the interesting event might
+    * be triggered.
+    */
+   sdt_ecxt_exit(map);
+
+
+LDT(Lock Dependency Tracker) APIs
+---------------------------------
+Do not use these APIs directly.  These are the wrappers for typical
+locks, that have been already applied into major locks internally e.g.
+spin lock, mutex, rwlock and so on.  The APIs of LDT are:
+
+   ldt_init(map, key, sub, name);
+   ldt_lock(map, sub_local, try, nest, ip);
+   ldt_rlock(map, sub_local, try, nest, ip, queued);
+   ldt_wlock(map, sub_local, try, nest, ip);
+   ldt_unlock(map, ip);
+   ldt_downgrade(map, ip);
+   ldt_set_class(map, name, key, sub_local, ip);
+
+
+Raw APIs
+--------
+Do not use these APIs directly.  The raw APIs of dept are:
+
+   dept_free_range(start, size);
+   dept_map_init(map, key, sub, name);
+   dept_map_reinit(map, key, sub, name);
+   dept_ext_wgen_init(ext_wgen);
+   dept_map_copy(map_to, map_from);
+   dept_wait(map, wait_flags, ip, wait_func, sub_local, time);
+   dept_stage_wait(map, key, ip, wait_func, time);
+   dept_request_event_wait_commit();
+   dept_clean_stage();
+   dept_stage_event(task, ip);
+   dept_ecxt_enter(map, evt_flags, ip, ecxt_func, evt_func, sub_local);
+   dept_ecxt_holding(map, evt_flags);
+   dept_request_event(map, ext_wgen);
+   dept_event(map, evt_flags, ip, evt_func, ext_wgen);
+   dept_ecxt_exit(map, evt_flags, ip);
+   dept_ecxt_enter_nokeep(map);
+   dept_key_init(key);
+   dept_key_destroy(key);
+   dept_map_ecxt_modify(map, cur_evt_flags, key, evt_flags, ip, ecxt_func, evt_func, sub_local);
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 31/47] dept: assign dept map to mmu notifier invalidation synchronization
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Resolved the following false positive by introducing explicit dept map
and annotations for dealing with this case:

   *** DEADLOCK ***
   context A
       [S] (unknown)(<sched>:0)
       [W] lock(&mm->mmap_lock:0)
       [E] try_to_wake_up(<sched>:0)

   context B
       [S] lock(&mm->mmap_lock:0)
       [W] mmu_interval_read_begin(<sched>:0)
       [E] unlock(&mm->mmap_lock:0)

   [S]: start of the event context
   [W]: the wait blocked
   [E]: the event not reachable

dept already tracks dependencies between scheduler sleep and ttwu based
on internal timestamp called wgen.  However, in case that more than one
event contexts are overwrapped, dept has chance to wrongly guess the
start of the event context like the following:

   <before this patch>

   context A: lock L
   context A: mmu_notifier_invalidate_range_start()

   context B: lock L'
   context B: mmu_interval_read_begin() : wait
   <- here is the start of the event context of C.
   context B: unlock L'

   context C: lock L''
   context C: mmu_notifier_invalidate_range_start()

   context A: mmu_notifier_invalidate_range_end()
   context A: unlock L

   context C: mmu_notifier_invalidate_range_end() : ttwu
   <- here is the end of the event context of C.  dept observes a wait,
      lock L'' within the event context of C.  Which causes a false
      positive dept report.

   context C: unlock L''

By explicitly annotating the interesting event context range, make dept
work with more precise information like:

   <after this patch>

   context A: lock L
   context A: mmu_notifier_invalidate_range_start()

   context B: lock L'
   context B: mmu_interval_read_begin() : wait
   context B: unlock L'

   context C: lock L''
   context C: mmu_notifier_invalidate_range_start()
   <- here is the start of the event context of C.

   context A: mmu_notifier_invalidate_range_end()
   context A: unlock L

   context C: mmu_notifier_invalidate_range_end() : ttwu
   <- here is the end of the event context of C.  dept doesn't observe
      the wait, lock L'' within the event context of C.  context C is
      responsible only for the range delimited by
      mmu_notifier_invalidate_range_{start,end}().

   context C: unlock L''

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/mmu_notifier.h | 26 ++++++++++++++++++++++++++
 mm/mmu_notifier.c            | 31 +++++++++++++++++++++++++++++--
 2 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index d1094c2d5fb6..2b70dce149f0 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -428,6 +428,14 @@ static inline int mmu_notifier_test_young(struct mm_struct *mm,
 	return 0;
 }
 
+#ifdef CONFIG_DEPT
+void mmu_notifier_invalidate_dept_ecxt_start(struct mmu_notifier_range *range);
+void mmu_notifier_invalidate_dept_ecxt_end(struct mmu_notifier_range *range);
+#else
+static inline void mmu_notifier_invalidate_dept_ecxt_start(struct mmu_notifier_range *range) {}
+static inline void mmu_notifier_invalidate_dept_ecxt_end(struct mmu_notifier_range *range) {}
+#endif
+
 static inline void
 mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
 {
@@ -439,6 +447,12 @@ mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
 		__mmu_notifier_invalidate_range_start(range);
 	}
 	lock_map_release(&__mmu_notifier_invalidate_range_start_map);
+
+	/*
+	 * From now on, waiters could be there by this start until
+	 * mmu_notifier_invalidate_range_end().
+	 */
+	mmu_notifier_invalidate_dept_ecxt_start(range);
 }
 
 /*
@@ -459,6 +473,12 @@ mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range)
 		ret = __mmu_notifier_invalidate_range_start(range);
 	}
 	lock_map_release(&__mmu_notifier_invalidate_range_start_map);
+
+	/*
+	 * From now on, waiters could be there by this start until
+	 * mmu_notifier_invalidate_range_end().
+	 */
+	mmu_notifier_invalidate_dept_ecxt_start(range);
 	return ret;
 }
 
@@ -470,6 +490,12 @@ mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
 
 	if (mm_has_notifiers(range->mm))
 		__mmu_notifier_invalidate_range_end(range);
+
+	/*
+	 * The event context that has been started by
+	 * mmu_notifier_invalidate_range_start() ends.
+	 */
+	mmu_notifier_invalidate_dept_ecxt_end(range);
 }
 
 static inline void mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm,
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 8e0125dc0522..31af5ea54a0c 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -46,6 +46,7 @@ struct mmu_notifier_subscriptions {
 	unsigned long active_invalidate_ranges;
 	struct rb_root_cached itree;
 	wait_queue_head_t wq;
+	struct dept_map dmap;
 	struct hlist_head deferred_list;
 };
 
@@ -165,6 +166,25 @@ static void mn_itree_inv_end(struct mmu_notifier_subscriptions *subscriptions)
 	wake_up_all(&subscriptions->wq);
 }
 
+#ifdef CONFIG_DEPT
+void mmu_notifier_invalidate_dept_ecxt_start(struct mmu_notifier_range *range)
+{
+	struct mmu_notifier_subscriptions *subscriptions =
+		range->mm->notifier_subscriptions;
+
+	if (subscriptions)
+		sdt_ecxt_enter(&subscriptions->dmap);
+}
+void mmu_notifier_invalidate_dept_ecxt_end(struct mmu_notifier_range *range)
+{
+	struct mmu_notifier_subscriptions *subscriptions =
+		range->mm->notifier_subscriptions;
+
+	if (subscriptions)
+		sdt_ecxt_exit(&subscriptions->dmap);
+}
+#endif
+
 /**
  * mmu_interval_read_begin - Begin a read side critical section against a VA
  *                           range
@@ -246,9 +266,12 @@ mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub)
 	 */
 	lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
 	lock_map_release(&__mmu_notifier_invalidate_range_start_map);
-	if (is_invalidating)
+	if (is_invalidating) {
+		sdt_might_sleep_start(&subscriptions->dmap);
 		wait_event(subscriptions->wq,
 			   READ_ONCE(subscriptions->invalidate_seq) != seq);
+		sdt_might_sleep_end();
+	}
 
 	/*
 	 * Notice that mmu_interval_read_retry() can already be true at this
@@ -625,6 +648,7 @@ int __mmu_notifier_register(struct mmu_notifier *subscription,
 
 		INIT_HLIST_HEAD(&subscriptions->list);
 		spin_lock_init(&subscriptions->lock);
+		sdt_map_init(&subscriptions->dmap);
 		subscriptions->invalidate_seq = 2;
 		subscriptions->itree = RB_ROOT_CACHED;
 		init_waitqueue_head(&subscriptions->wq);
@@ -1070,9 +1094,12 @@ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub)
 	 */
 	lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
 	lock_map_release(&__mmu_notifier_invalidate_range_start_map);
-	if (seq)
+	if (seq) {
+		sdt_might_sleep_start(&subscriptions->dmap);
 		wait_event(subscriptions->wq,
 			   mmu_interval_seq_released(subscriptions, seq));
+		sdt_might_sleep_end();
+	}
 
 	/* pairs with mmgrab in mmu_interval_notifier_insert() */
 	mmdrop(mm);
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 33/47] dept: make dept aware of lockdep_set_lock_cmp_fn() annotation
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

commit eb1cfd09f788e ("lockdep: Add lock_set_cmp_fn() annotation") has
been added to address the issue that lockdep was not able to detect a
true deadlock like the following:

   https://lore.kernel.org/lkml/20220510232633.GA18445@X58A-UD3R/

The approach is only for lockdep but dept should work being aware of it
because the new annotation is already used to avoid false positive of
lockdep in the code.

Make dept aware of the new lockdep annotation.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/dept.h     | 10 +++++++++
 kernel/dependency/dept.c | 48 +++++++++++++++++++++++++++++++++++-----
 kernel/locking/lockdep.c |  1 +
 3 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/include/linux/dept.h b/include/linux/dept.h
index b6dc4ff19537..8b4d97fc4627 100644
--- a/include/linux/dept.h
+++ b/include/linux/dept.h
@@ -130,6 +130,11 @@ struct dept_map {
 	const char			*name;
 	struct dept_key			*keys;
 
+	/*
+	 * keep lockdep map to handle lockdep_set_lock_cmp_fn().
+	 */
+	void				*lockdep_map;
+
 	/*
 	 * subclass that can be set from user
 	 */
@@ -156,6 +161,7 @@ struct dept_map {
 {									\
 	.name = #n,							\
 	.keys = (struct dept_key *)(k),					\
+	.lockdep_map = NULL,						\
 	.sub_u = 0,							\
 	.map_key = { .classes = { NULL, } },				\
 	.wgen = 0U,							\
@@ -427,6 +433,8 @@ extern void dept_softirqs_on_ip(unsigned long ip);
 extern void dept_hardirqs_on(void);
 extern void dept_softirqs_off(void);
 extern void dept_hardirqs_off(void);
+
+#define dept_set_lockdep_map(m, lockdep_m) ({ (m)->lockdep_map = lockdep_m; })
 #else /* !CONFIG_DEPT */
 struct dept_key { };
 struct dept_map { };
@@ -469,5 +477,7 @@ struct dept_ext_wgen { };
 #define dept_hardirqs_on()				do { } while (0)
 #define dept_softirqs_off()				do { } while (0)
 #define dept_hardirqs_off()				do { } while (0)
+
+#define dept_set_lockdep_map(m, lockdep_m)		do { } while (0)
 #endif
 #endif /* __LINUX_DEPT_H */
diff --git a/kernel/dependency/dept.c b/kernel/dependency/dept.c
index 1c6eb0a6d0a6..a0eb4b108de0 100644
--- a/kernel/dependency/dept.c
+++ b/kernel/dependency/dept.c
@@ -1615,9 +1615,39 @@ static int next_wgen(void)
 	return atomic_inc_return(&wgen) ?: atomic_inc_return(&wgen);
 }
 
-static void add_wait(struct dept_class *c, unsigned long ip,
-		     const char *w_fn, int sub_l, bool sched_sleep,
-		     bool timeout)
+/*
+ * XXX: This is a temporary patch needed until lockdep stops tracking
+ * dependency in wrong way.  lockdep has added an annotation to specify
+ * a callback to determin whether the given lock aquisition order is
+ * okay or not in its own way.  Even though dept is already working
+ * correctly with sub class on that issue, it needs to be aware of the
+ * annotation anyway.
+ */
+static bool lockdep_cmp_fn(struct dept_map *prev, struct dept_map *next)
+{
+	/*
+	 * Assumes the cmp_fn thing comes from struct lockdep_map.
+	 */
+	struct lockdep_map *p_lock = (struct lockdep_map *)prev->lockdep_map;
+	struct lockdep_map *n_lock = (struct lockdep_map *)next->lockdep_map;
+	struct lock_class *p_class = p_lock ? p_lock->class_cache[0] : NULL;
+	struct lock_class *n_class = n_lock ? n_lock->class_cache[0] : NULL;
+
+	if (!p_class || !n_class)
+		return false;
+
+	if (p_class != n_class)
+		return false;
+
+	if (!p_class->cmp_fn)
+		return false;
+
+	return p_class->cmp_fn(p_lock, n_lock) < 0;
+}
+
+static void add_wait(struct dept_map *m, struct dept_class *c,
+		unsigned long ip, const char *w_fn, int sub_l,
+		bool sched_sleep, bool timeout)
 {
 	struct dept_task *dt = dept_task();
 	struct dept_wait *w;
@@ -1658,8 +1688,13 @@ static void add_wait(struct dept_class *c, unsigned long ip,
 		if (!eh->ecxt)
 			continue;
 
-		if (eh->ecxt->class != c || eh->sub_l == sub_l)
-			add_dep(eh->ecxt, w);
+		if (eh->ecxt->class == c && eh->sub_l != sub_l)
+			continue;
+
+		if (i == dt->ecxt_held_pos - 1 && lockdep_cmp_fn(eh->map, m))
+			continue;
+
+		add_dep(eh->ecxt, w);
 	}
 
 	wg = next_wgen();
@@ -2145,6 +2180,7 @@ void dept_map_init(struct dept_map *m, struct dept_key *k, int sub_u,
 	m->name = n;
 	m->wgen = 0U;
 	m->nocheck = !valid_key(k);
+	m->lockdep_map = NULL;
 
 	dept_exit_recursive(flags);
 }
@@ -2366,7 +2402,7 @@ static void __dept_wait(struct dept_map *m, unsigned long w_f,
 		if (!c)
 			continue;
 
-		add_wait(c, ip, w_fn, sub_l, sched_sleep, timeout);
+		add_wait(m, c, ip, w_fn, sub_l, sched_sleep, timeout);
 	}
 }
 
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 39b9e3e27c0b..c99f91f7a54d 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -5035,6 +5035,7 @@ void lockdep_set_lock_cmp_fn(struct lockdep_map *lock, lock_cmp_fn cmp_fn,
 		class->print_fn = print_fn;
 	}
 
+	dept_set_lockdep_map(&lock->dmap, lock);
 	lockdep_recursion_finish();
 	raw_local_irq_restore(flags);
 }
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 30/47] fs/jbd2: use a weaker annotation in journal handling
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

jbd2 journal handling code doesn't want jbd2_might_wait_for_commit()
to be placed between start_this_handle() and stop_this_handle().  So it
marks the region with rwsem_acquire_read() and rwsem_release().

However, the annotation is too strong for that purpose.  We don't have
to use more than try lock annotation for that.

rwsem_acquire_read() implies:

   1. might be a waiter on contention of the lock.
   2. enter to the critical section of the lock.

All we need in here is to act 2, not 1.  So trylock version of
annotation is sufficient for that purpose.  Now that dept partially
relies on lockdep annotaions, dept interpets rwsem_acquire_read() as a
potential wait and might report a deadlock by the wait.

Replace it with trylock version of annotation.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 fs/jbd2/transaction.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index c7867139af69..b4e65f51bf5e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -441,7 +441,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
 	read_unlock(&journal->j_state_lock);
 	current->journal_info = handle;
 
-	rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_);
+	rwsem_acquire_read(&journal->j_trans_commit_map, 0, 1, _THIS_IP_);
 	jbd2_journal_free_transaction(new_transaction);
 	/*
 	 * Ensure that no allocations done while the transaction is open are
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 29/47] cpu/hotplug: use a weaker annotation in AP thread
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

cb92173d1f0 ("locking/lockdep, cpu/hotplug: Annotate AP thread") was
introduced to make lockdep_assert_cpus_held() work in AP thread.

However, the annotation is too strong for that purpose.  We don't have
to use more than try lock annotation for that.

rwsem_acquire() implies:

   1. might be a waiter on contention of the lock.
   2. enter to the critical section of the lock.

All we need in here is to act 2, not 1.  So trylock version of
annotation is sufficient for that purpose.  Now that dept partially
relies on lockdep annotaions, dept interpets rwsem_acquire() as a
potential wait and might report a deadlock by the wait.

Replace it with trylock version of annotation.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 kernel/cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index db9f6c539b28..285d7fa55523 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -538,7 +538,7 @@ int lockdep_is_cpus_held(void)
 
 static void lockdep_acquire_cpus_lock(void)
 {
-	rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
+	rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 1, _THIS_IP_);
 }
 
 static void lockdep_release_cpus_lock(void)
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 32/47] dept: assign unique dept_key to each distinct dma fence caller
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

dma fence can be used at various points in the code and it's very hard
to distinguish dma fences between different usages.  Using a single
dept_key for all the dma fences could trigger false positive reports.

Assign unique dept_key to each distinct dma fence wait to avoid false
positive reports.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 drivers/dma-buf/dma-fence.c | 18 ++++-----
 include/linux/dma-fence.h   | 74 +++++++++++++++++++++++++++++--------
 2 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 0a4b519e3351..df52a7ae336a 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -481,7 +481,7 @@ int dma_fence_signal(struct dma_fence *fence)
 EXPORT_SYMBOL(dma_fence_signal);
 
 /**
- * dma_fence_wait_timeout - sleep until the fence gets signaled
+ * __dma_fence_wait_timeout - sleep until the fence gets signaled
  * or until timeout elapses
  * @fence: the fence to wait on
  * @intr: if true, do an interruptible wait
@@ -499,7 +499,7 @@ EXPORT_SYMBOL(dma_fence_signal);
  * See also dma_fence_wait() and dma_fence_wait_any_timeout().
  */
 signed long
-dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
+__dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
 {
 	signed long ret;
 
@@ -528,7 +528,7 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
 	}
 	return ret;
 }
-EXPORT_SYMBOL(dma_fence_wait_timeout);
+EXPORT_SYMBOL(__dma_fence_wait_timeout);
 
 /**
  * dma_fence_release - default release function for fences
@@ -764,7 +764,7 @@ dma_fence_default_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 }
 
 /**
- * dma_fence_default_wait - default sleep until the fence gets signaled
+ * __dma_fence_default_wait - default sleep until the fence gets signaled
  * or until timeout elapses
  * @fence: the fence to wait on
  * @intr: if true, do an interruptible wait
@@ -776,7 +776,7 @@ dma_fence_default_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
  * functions taking a jiffies timeout.
  */
 signed long
-dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
+__dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 {
 	struct default_wait_cb cb;
 	unsigned long flags;
@@ -825,7 +825,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 	spin_unlock_irqrestore(fence->lock, flags);
 	return ret;
 }
-EXPORT_SYMBOL(dma_fence_default_wait);
+EXPORT_SYMBOL(__dma_fence_default_wait);
 
 static bool
 dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count,
@@ -845,7 +845,7 @@ dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count,
 }
 
 /**
- * dma_fence_wait_any_timeout - sleep until any fence gets signaled
+ * __dma_fence_wait_any_timeout - sleep until any fence gets signaled
  * or until timeout elapses
  * @fences: array of fences to wait on
  * @count: number of fences to wait on
@@ -865,7 +865,7 @@ dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count,
  * See also dma_fence_wait() and dma_fence_wait_timeout().
  */
 signed long
-dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
+__dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
 			   bool intr, signed long timeout, uint32_t *idx)
 {
 	struct default_wait_cb *cb;
@@ -933,7 +933,7 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
 
 	return ret;
 }
-EXPORT_SYMBOL(dma_fence_wait_any_timeout);
+EXPORT_SYMBOL(__dma_fence_wait_any_timeout);
 
 /**
  * DOC: deadline hints
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index 64639e104110..1062fbb637e5 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -369,8 +369,22 @@ int dma_fence_signal_locked(struct dma_fence *fence);
 int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp);
 int dma_fence_signal_timestamp_locked(struct dma_fence *fence,
 				      ktime_t timestamp);
-signed long dma_fence_default_wait(struct dma_fence *fence,
+signed long __dma_fence_default_wait(struct dma_fence *fence,
 				   bool intr, signed long timeout);
+
+/*
+ * Associate every caller with its own dept map.
+ */
+#define dma_fence_default_wait(f, intr, t)				\
+({									\
+	signed long __ret;						\
+									\
+	sdt_might_sleep_start_timeout(NULL, t);				\
+	__ret = __dma_fence_default_wait(f, intr, t);			\
+	sdt_might_sleep_end();						\
+	__ret;								\
+})
+
 int dma_fence_add_callback(struct dma_fence *fence,
 			   struct dma_fence_cb *cb,
 			   dma_fence_func_t func);
@@ -607,12 +621,37 @@ static inline ktime_t dma_fence_timestamp(struct dma_fence *fence)
 	return fence->timestamp;
 }
 
-signed long dma_fence_wait_timeout(struct dma_fence *,
+signed long __dma_fence_wait_timeout(struct dma_fence *,
 				   bool intr, signed long timeout);
-signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
+signed long __dma_fence_wait_any_timeout(struct dma_fence **fences,
 				       uint32_t count,
 				       bool intr, signed long timeout,
 				       uint32_t *idx);
+/*
+ * Associate every caller with its own dept map.
+ */
+#define dma_fence_wait_timeout(f, intr, t)				\
+({									\
+	signed long __ret;						\
+									\
+	sdt_might_sleep_start_timeout(NULL, t);				\
+	__ret = __dma_fence_wait_timeout(f, intr, t);			\
+	sdt_might_sleep_end();						\
+	__ret;								\
+})
+
+/*
+ * Associate every caller with its own dept map.
+ */
+#define dma_fence_wait_any_timeout(fpp, count, intr, t, idx)		\
+({									\
+	signed long __ret;						\
+									\
+	sdt_might_sleep_start_timeout(NULL, t);				\
+	__ret = __dma_fence_wait_any_timeout(fpp, count, intr, t, idx);	\
+	sdt_might_sleep_end();						\
+	__ret;								\
+})
 
 /**
  * dma_fence_wait - sleep until the fence gets signaled
@@ -628,19 +667,24 @@ signed long dma_fence_wait_any_timeout(struct dma_fence **fences,
  * fence might be freed before return, resulting in undefined behavior.
  *
  * See also dma_fence_wait_timeout() and dma_fence_wait_any_timeout().
+ *
+ * Associate every caller with its own dept map.
  */
-static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
-{
-	signed long ret;
-
-	/* Since dma_fence_wait_timeout cannot timeout with
-	 * MAX_SCHEDULE_TIMEOUT, only valid return values are
-	 * -ERESTARTSYS and MAX_SCHEDULE_TIMEOUT.
-	 */
-	ret = dma_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT);
-
-	return ret < 0 ? ret : 0;
-}
+#define dma_fence_wait(f, intr)						\
+({									\
+	signed long __ret;						\
+									\
+	sdt_might_sleep_start_timeout(NULL, MAX_SCHEDULE_TIMEOUT);	\
+	__ret = __dma_fence_wait_timeout(f, intr, MAX_SCHEDULE_TIMEOUT);\
+	sdt_might_sleep_end();						\
+									\
+	/*								\
+	 * Since dma_fence_wait_timeout cannot timeout with		\
+	 * MAX_SCHEDULE_TIMEOUT, only valid return values are		\
+	 * -ERESTARTSYS and MAX_SCHEDULE_TIMEOUT.			\
+	 */								\
+	__ret < 0 ? __ret : 0;						\
+})
 
 void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline);
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 27/47] locking/lockdep: prevent various lockdep assertions when lockdep_off()'ed
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

lockdep provides APIs for assertion only if lockdep is enabled at the
moment asserting to avoid unnecessary confusion, using the following
condition, debug_locks && !this_cpu_read(lockdep_recursion).

However, lockdep_{off,on}() are also used for disabling and enabling
lockdep for a simular purpose.  Add !lockdep_recursing(current) that is
updated by lockdep_{off,on}() to the condition so that the assertions
are aware of !__lockdep_enabled if lockdep_off()'ed.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/lockdep.h  |  3 ++-
 kernel/locking/lockdep.c | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index ef03d8808c10..c83fe95199db 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -303,6 +303,7 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
 	lockdep_assert_once(!current->lockdep_depth)
 
 #define lockdep_recursing(tsk)	((tsk)->lockdep_recursion)
+extern bool lockdep_recursing_current(void);
 
 #define lockdep_pin_lock(l)	lock_pin_lock(&(l)->dep_map)
 #define lockdep_repin_lock(l,c)	lock_repin_lock(&(l)->dep_map, (c))
@@ -630,7 +631,7 @@ DECLARE_PER_CPU(int, hardirqs_enabled);
 DECLARE_PER_CPU(int, hardirq_context);
 DECLARE_PER_CPU(unsigned int, lockdep_recursion);
 
-#define __lockdep_enabled	(debug_locks && !this_cpu_read(lockdep_recursion))
+#define __lockdep_enabled	(debug_locks && !this_cpu_read(lockdep_recursion) && !lockdep_recursing_current())
 
 #define lockdep_assert_irqs_enabled()					\
 do {									\
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index dc97f2753ef8..39b9e3e27c0b 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -6900,3 +6900,13 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
 	warn_rcu_exit(rcu);
 }
 EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
+
+/*
+ * For avoiding header dependency when using (struct task_struct *)current
+ * and lockdep_recursing() at the same time.
+ */
+noinstr bool lockdep_recursing_current(void)
+{
+	return lockdep_recursing(current);
+}
+EXPORT_SYMBOL_GPL(lockdep_recursing_current);
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 25/47] dept: track PG_locked with dept
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Makes dept able to track PG_locked waits and events, which will be
useful in practice.  See the following link that shows dept worked with
PG_locked and detected real issues in practice:

   https://lore.kernel.org/lkml/1674268856-31807-1-git-send-email-byungchul.park@lge.com/

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/mm_types.h   |   2 +
 include/linux/page-flags.h | 125 +++++++++++++++++++++++++++++++++----
 include/linux/pagemap.h    |  37 ++++++++++-
 mm/filemap.c               |  26 ++++++++
 mm/mm_init.c               |   2 +
 5 files changed, 179 insertions(+), 13 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a643fae8a349..5ebc565309af 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -20,6 +20,7 @@
 #include <linux/seqlock.h>
 #include <linux/percpu_counter.h>
 #include <linux/types.h>
+#include <linux/dept.h>
 
 #include <asm/mmu.h>
 
@@ -223,6 +224,7 @@ struct page {
 	struct page *kmsan_shadow;
 	struct page *kmsan_origin;
 #endif
+	struct dept_ext_wgen pg_locked_wgen;
 } _struct_page_alignment;
 
 /*
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 8d3fa3a91ce4..d3c4954c4218 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -198,6 +198,61 @@ enum pageflags {
 
 #ifndef __GENERATING_BOUNDS_H
 
+#ifdef CONFIG_DEPT
+#include <linux/kernel.h>
+#include <linux/dept.h>
+
+extern struct dept_map pg_locked_map;
+
+/*
+ * Place the following annotations in its suitable point in code:
+ *
+ *	Annotate dept_page_set_bit() around firstly set_bit*()
+ *	Annotate dept_page_clear_bit() around clear_bit*()
+ *	Annotate dept_page_wait_on_bit() around wait_on_bit*()
+ */
+
+static inline void dept_page_set_bit(struct page *p, int bit_nr)
+{
+	if (bit_nr == PG_locked)
+		dept_request_event(&pg_locked_map, &p->pg_locked_wgen);
+}
+
+static inline void dept_page_clear_bit(struct page *p, int bit_nr)
+{
+	if (bit_nr == PG_locked)
+		dept_event(&pg_locked_map, 1UL, _RET_IP_, __func__, &p->pg_locked_wgen);
+}
+
+static inline void dept_page_wait_on_bit(struct page *p, int bit_nr)
+{
+	if (bit_nr == PG_locked)
+		dept_wait(&pg_locked_map, 1UL, _RET_IP_, __func__, 0, -1L);
+}
+
+static inline void dept_folio_set_bit(struct folio *f, int bit_nr)
+{
+	dept_page_set_bit(&f->page, bit_nr);
+}
+
+static inline void dept_folio_clear_bit(struct folio *f, int bit_nr)
+{
+	dept_page_clear_bit(&f->page, bit_nr);
+}
+
+static inline void dept_folio_wait_on_bit(struct folio *f, int bit_nr)
+{
+	dept_page_wait_on_bit(&f->page, bit_nr);
+}
+#else
+#define dept_page_set_bit(p, bit_nr)		do { } while (0)
+#define dept_page_clear_bit(p, bit_nr)		do { } while (0)
+#define dept_page_wait_on_bit(p, bit_nr)	do { } while (0)
+#define dept_folio_set_bit(f, bit_nr)		do { } while (0)
+#define dept_folio_clear_bit(f, bit_nr)		do { } while (0)
+#define dept_folio_wait_on_bit(f, bit_nr)	do { } while (0)
+#endif
+
 #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 DECLARE_STATIC_KEY_FALSE(hugetlb_optimize_vmemmap_key);
 
@@ -419,27 +474,51 @@ static __always_inline bool folio_test_##name(const struct folio *folio) \
 
 #define FOLIO_SET_FLAG(name, page)					\
 static __always_inline void folio_set_##name(struct folio *folio)	\
-{ set_bit(PG_##name, folio_flags(folio, page)); }
+{									\
+	set_bit(PG_##name, folio_flags(folio, page));			\
+	dept_folio_set_bit(folio, PG_##name);				\
+}
 
 #define FOLIO_CLEAR_FLAG(name, page)					\
 static __always_inline void folio_clear_##name(struct folio *folio)	\
-{ clear_bit(PG_##name, folio_flags(folio, page)); }
+{									\
+	clear_bit(PG_##name, folio_flags(folio, page));			\
+	dept_folio_clear_bit(folio, PG_##name);				\
+}
 
 #define __FOLIO_SET_FLAG(name, page)					\
 static __always_inline void __folio_set_##name(struct folio *folio)	\
-{ __set_bit(PG_##name, folio_flags(folio, page)); }
+{									\
+	__set_bit(PG_##name, folio_flags(folio, page));			\
+	dept_folio_set_bit(folio, PG_##name);				\
+}
 
 #define __FOLIO_CLEAR_FLAG(name, page)					\
 static __always_inline void __folio_clear_##name(struct folio *folio)	\
-{ __clear_bit(PG_##name, folio_flags(folio, page)); }
+{									\
+	__clear_bit(PG_##name, folio_flags(folio, page));		\
+	dept_folio_clear_bit(folio, PG_##name);				\
+}
 
 #define FOLIO_TEST_SET_FLAG(name, page)					\
 static __always_inline bool folio_test_set_##name(struct folio *folio)	\
-{ return test_and_set_bit(PG_##name, folio_flags(folio, page)); }
+{									\
+	bool __ret = test_and_set_bit(PG_##name, folio_flags(folio, page)); \
+									\
+	if (!__ret)							\
+		dept_folio_set_bit(folio, PG_##name);			\
+	return __ret;							\
+}
 
 #define FOLIO_TEST_CLEAR_FLAG(name, page)				\
 static __always_inline bool folio_test_clear_##name(struct folio *folio) \
-{ return test_and_clear_bit(PG_##name, folio_flags(folio, page)); }
+{									\
+	bool __ret = test_and_clear_bit(PG_##name, folio_flags(folio, page)); \
+									\
+	if (__ret)							\
+		dept_folio_clear_bit(folio, PG_##name);			\
+	return __ret;							\
+}
 
 #define FOLIO_FLAG(name, page)						\
 FOLIO_TEST_FLAG(name, page)						\
@@ -454,32 +533,54 @@ static __always_inline int Page##uname(const struct page *page)		\
 #define SETPAGEFLAG(uname, lname, policy)				\
 FOLIO_SET_FLAG(lname, FOLIO_##policy)					\
 static __always_inline void SetPage##uname(struct page *page)		\
-{ set_bit(PG_##lname, &policy(page, 1)->flags); }
+{									\
+	set_bit(PG_##lname, &policy(page, 1)->flags);			\
+	dept_page_set_bit(page, PG_##lname);				\
+}
 
 #define CLEARPAGEFLAG(uname, lname, policy)				\
 FOLIO_CLEAR_FLAG(lname, FOLIO_##policy)					\
 static __always_inline void ClearPage##uname(struct page *page)		\
-{ clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{									\
+	clear_bit(PG_##lname, &policy(page, 1)->flags);			\
+	dept_page_clear_bit(page, PG_##lname);				\
+}
 
 #define __SETPAGEFLAG(uname, lname, policy)				\
 __FOLIO_SET_FLAG(lname, FOLIO_##policy)					\
 static __always_inline void __SetPage##uname(struct page *page)		\
-{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
+{									\
+	__set_bit(PG_##lname, &policy(page, 1)->flags);			\
+	dept_page_set_bit(page, PG_##lname);				\
+}
 
 #define __CLEARPAGEFLAG(uname, lname, policy)				\
 __FOLIO_CLEAR_FLAG(lname, FOLIO_##policy)				\
 static __always_inline void __ClearPage##uname(struct page *page)	\
-{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{									\
+	__clear_bit(PG_##lname, &policy(page, 1)->flags);		\
+	dept_page_clear_bit(page, PG_##lname);				\
+}
 
 #define TESTSETFLAG(uname, lname, policy)				\
 FOLIO_TEST_SET_FLAG(lname, FOLIO_##policy)				\
 static __always_inline int TestSetPage##uname(struct page *page)	\
-{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
+{									\
+	bool ret = test_and_set_bit(PG_##lname, &policy(page, 1)->flags);\
+	if (!ret)							\
+		dept_page_set_bit(page, PG_##lname);			\
+	return ret;							\
+}
 
 #define TESTCLEARFLAG(uname, lname, policy)				\
 FOLIO_TEST_CLEAR_FLAG(lname, FOLIO_##policy)				\
 static __always_inline int TestClearPage##uname(struct page *page)	\
-{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{									\
+	bool ret = test_and_clear_bit(PG_##lname, &policy(page, 1)->flags);\
+	if (ret)							\
+		dept_page_clear_bit(page, PG_##lname);			\
+	return ret;							\
+}
 
 #define PAGEFLAG(uname, lname, policy)					\
 	TESTPAGEFLAG(uname, lname, policy)				\
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 12a12dae727d..53b68b7a3f17 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1093,7 +1093,12 @@ void folio_unlock(struct folio *folio);
  */
 static inline bool folio_trylock(struct folio *folio)
 {
-	return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
+	bool ret = !test_and_set_bit_lock(PG_locked, folio_flags(folio, 0));
+
+	if (ret)
+		dept_page_set_bit(&folio->page, PG_locked);
+
+	return likely(ret);
 }
 
 /*
@@ -1129,6 +1134,16 @@ static inline bool trylock_page(struct page *page)
 static inline void folio_lock(struct folio *folio)
 {
 	might_sleep();
+
+	/*
+	 * dept_page_wait_on_bit() will be called if __folio_lock() goes
+	 * through a real wait path.  However, for better job to detect
+	 * *potential* deadlocks, let's assume that folio_lock() always
+	 * goes through wait so that dept can take into account all the
+	 * potential cases.
+	 */
+	dept_page_wait_on_bit(&folio->page, PG_locked);
+
 	if (!folio_trylock(folio))
 		__folio_lock(folio);
 }
@@ -1149,6 +1164,15 @@ static inline void lock_page(struct page *page)
 	struct folio *folio;
 	might_sleep();
 
+	/*
+	 * dept_page_wait_on_bit() will be called if __folio_lock() goes
+	 * through a real wait path.  However, for better job to detect
+	 * *potential* deadlocks, let's assume that lock_page() always
+	 * goes through wait so that dept can take into account all the
+	 * potential cases.
+	 */
+	dept_page_wait_on_bit(page, PG_locked);
+
 	folio = page_folio(page);
 	if (!folio_trylock(folio))
 		__folio_lock(folio);
@@ -1167,6 +1191,17 @@ static inline void lock_page(struct page *page)
 static inline int folio_lock_killable(struct folio *folio)
 {
 	might_sleep();
+
+	/*
+	 * dept_page_wait_on_bit() will be called if
+	 * __folio_lock_killable() goes through a real wait path.
+	 * However, for better job to detect *potential* deadlocks,
+	 * let's assume that folio_lock_killable() always goes through
+	 * wait so that dept can take into account all the potential
+	 * cases.
+	 */
+	dept_page_wait_on_bit(&folio->page, PG_locked);
+
 	if (!folio_trylock(folio))
 		return __folio_lock_killable(folio);
 	return 0;
diff --git a/mm/filemap.c b/mm/filemap.c
index 751838ef05e5..edb0710ddb3f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -48,6 +48,7 @@
 #include <linux/rcupdate_wait.h>
 #include <linux/sched/mm.h>
 #include <linux/sysctl.h>
+#include <linux/dept.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -1145,6 +1146,7 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
 		if (flags & WQ_FLAG_CUSTOM) {
 			if (test_and_set_bit(key->bit_nr, &key->folio->flags))
 				return -1;
+			dept_page_set_bit(&key->folio->page, key->bit_nr);
 			flags |= WQ_FLAG_DONE;
 		}
 	}
@@ -1228,6 +1230,7 @@ static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
 	if (wait->flags & WQ_FLAG_EXCLUSIVE) {
 		if (test_and_set_bit(bit_nr, &folio->flags))
 			return false;
+		dept_page_set_bit(&folio->page, bit_nr);
 	} else if (test_bit(bit_nr, &folio->flags))
 		return false;
 
@@ -1235,6 +1238,9 @@ static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
 	return true;
 }
 
+struct dept_map __maybe_unused pg_locked_map = DEPT_MAP_INITIALIZER(pg_locked_map, NULL);
+EXPORT_SYMBOL(pg_locked_map);
+
 static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
 		int state, enum behavior behavior)
 {
@@ -1246,6 +1252,8 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
 	unsigned long pflags;
 	bool in_thrashing;
 
+	dept_page_wait_on_bit(&folio->page, bit_nr);
+
 	if (bit_nr == PG_locked &&
 	    !folio_test_uptodate(folio) && folio_test_workingset(folio)) {
 		delayacct_thrashing_start(&in_thrashing);
@@ -1339,6 +1347,23 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
 		break;
 	}
 
+	/*
+	 * dept_page_set_bit() might have been called already in
+	 * folio_trylock_flag(), wake_page_function() or somewhere.
+	 * However, call it again to reset the wgen of dept to ensure
+	 * dept_page_wait_on_bit() is called prior to
+	 * dept_page_set_bit().
+	 *
+	 * Remind dept considers all the waits between
+	 * dept_page_set_bit() and dept_page_clear_bit() as potential
+	 * event disturbers. Ensure the correct sequence so that dept
+	 * can make correct decisions:
+	 *
+	 *	wait -> acquire(set bit) -> release(clear bit)
+	 */
+	if (wait->flags & WQ_FLAG_DONE)
+		dept_page_set_bit(&folio->page, bit_nr);
+
 	/*
 	 * If a signal happened, this 'finish_wait()' may remove the last
 	 * waiter from the wait-queues, but the folio waiters bit will remain
@@ -1496,6 +1521,7 @@ void folio_unlock(struct folio *folio)
 	BUILD_BUG_ON(PG_waiters != 7);
 	BUILD_BUG_ON(PG_locked > 7);
 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+	dept_page_clear_bit(&folio->page, PG_locked);
 	if (folio_xor_flags_has_waiters(folio, 1 << PG_locked))
 		folio_wake_bit(folio, PG_locked);
 }
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5c21b3af216b..09e4ac6a73c7 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -32,6 +32,7 @@
 #include <linux/vmstat.h>
 #include <linux/kexec_handover.h>
 #include <linux/hugetlb.h>
+#include <linux/dept.h>
 #include "internal.h"
 #include "slab.h"
 #include "shuffle.h"
@@ -587,6 +588,7 @@ void __meminit __init_single_page(struct page *page, unsigned long pfn,
 	atomic_set(&page->_mapcount, -1);
 	page_cpupid_reset_last(page);
 	page_kasan_tag_reset(page);
+	dept_ext_wgen_init(&page->pg_locked_wgen);
 
 	INIT_LIST_HEAD(&page->lru);
 #ifdef WANT_PAGE_VIRTUAL
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 26/47] dept: print staged wait's stacktrace on report
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Currently, print nothing about what event wakes up in report.  However,
it makes hard to interpret dept's report.

Make it print wait's stacktrace that the event wakes up.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/dept.h     |  5 ++++
 include/linux/sched.h    |  2 ++
 kernel/dependency/dept.c | 59 ++++++++++++++++++++++++++++++++++------
 3 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/include/linux/dept.h b/include/linux/dept.h
index 236e4f06e5c8..b6dc4ff19537 100644
--- a/include/linux/dept.h
+++ b/include/linux/dept.h
@@ -227,6 +227,11 @@ struct dept_ecxt {
 			 */
 			unsigned long	event_ip;
 			struct dept_stack *event_stack;
+
+			/*
+			 * wait that this event ttwu
+			 */
+			struct dept_stack *ewait_stack;
 		};
 	};
 };
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 95b1450f22fa..a01c10f28dfd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -868,6 +868,7 @@ struct dept_task {
 	const char			*stage_w_fn;
 	unsigned long			stage_ip;
 	bool				stage_timeout;
+	struct dept_stack		*stage_wait_stack;
 	arch_spinlock_t			stage_lock;
 
 	/*
@@ -909,6 +910,7 @@ struct dept_task {
 	.stage_w_fn = NULL,					\
 	.stage_ip = 0UL,					\
 	.stage_timeout = false,					\
+	.stage_wait_stack = NULL,				\
 	.stage_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,\
 	.missing_ecxt = 0,					\
 	.hardirqs_enabled = false,				\
diff --git a/kernel/dependency/dept.c b/kernel/dependency/dept.c
index f928c12111fe..1c6eb0a6d0a6 100644
--- a/kernel/dependency/dept.c
+++ b/kernel/dependency/dept.c
@@ -523,6 +523,7 @@ static void initialize_ecxt(struct dept_ecxt *e)
 	e->enirqf = 0UL;
 	e->event_ip = 0UL;
 	e->event_stack = NULL;
+	e->ewait_stack = NULL;
 }
 SET_CONSTRUCTOR(ecxt, initialize_ecxt);
 
@@ -578,6 +579,8 @@ static void destroy_ecxt(struct dept_ecxt *e)
 		put_stack(e->ecxt_stack);
 	if (e->event_stack)
 		put_stack(e->event_stack);
+	if (e->ewait_stack)
+		put_stack(e->ewait_stack);
 }
 SET_DESTRUCTOR(ecxt, destroy_ecxt);
 
@@ -794,6 +797,11 @@ static void print_dep(struct dept_dep *d)
 
 		pr_warn("[E] %s(%s:%d):\n", e_fn, fc_n, fc->sub_id);
 		print_ip_stack(e->event_ip, e->event_stack);
+
+		if (valid_stack(e->ewait_stack)) {
+			pr_warn("(wait to wake up)\n");
+			print_ip_stack(0, e->ewait_stack);
+		}
 	}
 
 	if (!irqf) {
@@ -807,6 +815,11 @@ static void print_dep(struct dept_dep *d)
 
 		pr_warn("[E] %s(%s:%d):\n", e_fn, fc_n, fc->sub_id);
 		print_ip_stack(e->event_ip, e->event_stack);
+
+		if (valid_stack(e->ewait_stack)) {
+			pr_warn("(wait to wake up)\n");
+			print_ip_stack(0, e->ewait_stack);
+		}
 	}
 }
 
@@ -1657,7 +1670,8 @@ static void add_wait(struct dept_class *c, unsigned long ip,
 
 static struct dept_ecxt_held *add_ecxt(struct dept_map *m,
 		struct dept_class *c, unsigned long ip, const char *c_fn,
-		const char *e_fn, int sub_l)
+		const char *e_fn, int sub_l,
+		struct dept_stack *ewait_stack)
 {
 	struct dept_task *dt = dept_task();
 	struct dept_ecxt_held *eh;
@@ -1691,6 +1705,7 @@ static struct dept_ecxt_held *add_ecxt(struct dept_map *m,
 	e->class = get_class(c);
 	e->ecxt_ip = ip;
 	e->ecxt_stack = ip ? get_current_stack() : NULL;
+	e->ewait_stack = ewait_stack ? get_stack(ewait_stack) : NULL;
 	e->event_fn = e_fn;
 	e->ecxt_fn = c_fn;
 
@@ -1797,7 +1812,7 @@ static int find_hist_pos(unsigned int wg)
 
 static void do_event(struct dept_map *m, struct dept_map *real_m,
 		struct dept_class *c, unsigned int wg, unsigned long ip,
-		const char *e_fn)
+		const char *e_fn, struct dept_stack *ewait_stack)
 {
 	struct dept_task *dt = dept_task();
 	struct dept_wait_hist *wh;
@@ -1825,7 +1840,7 @@ static void do_event(struct dept_map *m, struct dept_map *real_m,
 	 */
 	if (find_ecxt_pos(real_m, c, false) != -1)
 		return;
-	eh = add_ecxt(m, c, 0UL, NULL, e_fn, 0);
+	eh = add_ecxt(m, c, 0UL, NULL, e_fn, 0, ewait_stack);
 
 	if (!eh)
 		return;
@@ -2360,7 +2375,8 @@ static void __dept_wait(struct dept_map *m, unsigned long w_f,
  */
 static void __dept_event(struct dept_map *m, struct dept_map *real_m,
 		unsigned long e_f, unsigned long ip, const char *e_fn,
-		bool sched_map, unsigned int wg)
+		bool sched_map, unsigned int wg,
+		struct dept_stack *ewait_stack)
 {
 	struct dept_class *c;
 	struct dept_key *k;
@@ -2382,7 +2398,7 @@ static void __dept_event(struct dept_map *m, struct dept_map *real_m,
 	c = check_new_class(&m->map_key, k, sub_id(m, e), m->name, sched_map);
 
 	if (c)
-		do_event(m, real_m, c, wg, ip, e_fn);
+		do_event(m, real_m, c, wg, ip, e_fn, ewait_stack);
 }
 
 void dept_wait(struct dept_map *m, unsigned long w_f,
@@ -2498,6 +2514,9 @@ static void __dept_clean_stage(struct dept_task *dt)
 	dt->stage_w_fn = NULL;
 	dt->stage_ip = 0UL;
 	dt->stage_timeout = false;
+	if (dt->stage_wait_stack)
+		put_stack(dt->stage_wait_stack);
+	dt->stage_wait_stack = NULL;
 }
 
 void dept_clean_stage(void)
@@ -2561,6 +2580,14 @@ void dept_request_event_wait_commit(void)
 
 	wg = next_wgen();
 	WRITE_ONCE(dt->stage_m.wgen, wg);
+
+	/*
+	 * __schedule() can be hit multiple times between
+	 * dept_stage_wait() and dept_clean_stage().  In that case,
+	 * keep the first stacktrace only.  That's enough.
+	 */
+	if (!dt->stage_wait_stack)
+		dt->stage_wait_stack = get_current_stack();
 	arch_spin_unlock(&dt->stage_lock);
 
 	__dept_wait(&dt->stage_m, 1UL, ip, w_fn, 0, true, sched_map, timeout);
@@ -2579,6 +2606,7 @@ void dept_ttwu_stage_wait(struct task_struct *requestor, unsigned long ip)
 	struct dept_map m;
 	struct dept_map *real_m;
 	bool sched_map;
+	struct dept_stack *ewait_stack;
 
 	if (unlikely(!dept_working()))
 		return;
@@ -2597,6 +2625,10 @@ void dept_ttwu_stage_wait(struct task_struct *requestor, unsigned long ip)
 	m = dt_req->stage_m;
 	sched_map = dt_req->stage_sched_map;
 	real_m = dt_req->stage_real_m;
+	ewait_stack = dt_req->stage_wait_stack;
+	if (ewait_stack)
+		get_stack(ewait_stack);
+
 	__dept_clean_stage(dt_req);
 	arch_spin_unlock(&dt_req->stage_lock);
 
@@ -2607,8 +2639,12 @@ void dept_ttwu_stage_wait(struct task_struct *requestor, unsigned long ip)
 	if (!m.keys)
 		goto exit;
 
-	__dept_event(&m, real_m, 1UL, ip, "try_to_wake_up", sched_map, m.wgen);
+	__dept_event(&m, real_m, 1UL, ip, "try_to_wake_up", sched_map,
+			m.wgen, ewait_stack);
 exit:
+	if (ewait_stack)
+		put_stack(ewait_stack);
+
 	dept_exit(flags);
 }
 
@@ -2688,7 +2724,7 @@ void dept_map_ecxt_modify(struct dept_map *m, unsigned long e_f,
 	k = m->keys ?: &m->map_key;
 	c = check_new_class(&m->map_key, k, sub_id(m, new_e), m->name, false);
 
-	if (c && add_ecxt(m, c, new_ip, new_c_fn, new_e_fn, new_sub_l))
+	if (c && add_ecxt(m, c, new_ip, new_c_fn, new_e_fn, new_sub_l, NULL))
 		goto exit;
 
 	/*
@@ -2740,7 +2776,7 @@ void dept_ecxt_enter(struct dept_map *m, unsigned long e_f, unsigned long ip,
 	k = m->keys ?: &m->map_key;
 	c = check_new_class(&m->map_key, k, sub_id(m, e), m->name, false);
 
-	if (c && add_ecxt(m, c, ip, c_fn, e_fn, sub_l))
+	if (c && add_ecxt(m, c, ip, c_fn, e_fn, sub_l, NULL))
 		goto exit;
 missing_ecxt:
 	dt->missing_ecxt++;
@@ -2840,7 +2876,7 @@ void dept_event(struct dept_map *m, unsigned long e_f,
 
 	flags = dept_enter();
 
-	__dept_event(m, m, e_f, ip, e_fn, false, READ_ONCE(*wg_p));
+	__dept_event(m, m, e_f, ip, e_fn, false, READ_ONCE(*wg_p), NULL);
 
 	/*
 	 * Keep the map diabled until the next sleep.
@@ -2912,6 +2948,11 @@ void dept_task_exit(struct task_struct *t)
 		dt->stack = NULL;
 	}
 
+	if (dt->stage_wait_stack) {
+		put_stack(dt->stage_wait_stack);
+		dt->stage_wait_stack = NULL;
+	}
+
 	for (i = 0; i < dt->ecxt_held_pos; i++) {
 		if (dt->ecxt_held[i].class) {
 			put_class(dt->ecxt_held[i].class);
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 23/47] dept: apply timeout consideration to dma fence wait
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Now that CONFIG_DEPT_AGGRESSIVE_TIMEOUT_WAIT was introduced, apply the
consideration to dma fence wait.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 drivers/dma-buf/dma-fence.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 787fa10a49f1..0a4b519e3351 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -801,7 +801,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 	cb.task = current;
 	list_add(&cb.base.node, &fence->cb_list);
 
-	sdt_might_sleep_start(NULL);
+	sdt_might_sleep_start_timeout(NULL, timeout);
 	while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) {
 		if (intr)
 			__set_current_state(TASK_INTERRUPTIBLE);
@@ -905,7 +905,7 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
 		}
 	}
 
-	sdt_might_sleep_start(NULL);
+	sdt_might_sleep_start_timeout(NULL, timeout);
 	while (ret > 0) {
 		if (intr)
 			set_current_state(TASK_INTERRUPTIBLE);
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 24/47] dept: make dept able to work with an external wgen
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

There is a case where the total map size of waits of a class is so large.
For instance, PG_locked is the case if every struct page embeds its
regular map for PG_locked.  The total size for the maps will be 'the #
of pages * sizeof(struct dept_map)', which is too big to accept.

Keep the minimum data in the case, timestamp called 'wgen', that dept
uses.  Make dept able to work with the wgen instead of whole regular map.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/dept.h     | 18 ++++++++++++++----
 include/linux/dept_sdt.h |  6 +++---
 kernel/dependency/dept.c | 30 +++++++++++++++++++++---------
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/include/linux/dept.h b/include/linux/dept.h
index 49f457390521..236e4f06e5c8 100644
--- a/include/linux/dept.h
+++ b/include/linux/dept.h
@@ -372,6 +372,13 @@ struct dept_wait_hist {
 	unsigned int			ctxt_id;
 };
 
+/*
+ * for subsystems that requires compact use of memory e.g. struct page
+ */
+struct dept_ext_wgen {
+	unsigned int wgen;
+};
+
 extern void dept_on(void);
 extern void dept_off(void);
 extern void dept_init(void);
@@ -381,6 +388,7 @@ extern void dept_free_range(void *start, unsigned int sz);
 
 extern void dept_map_init(struct dept_map *m, struct dept_key *k, int sub_u, const char *n);
 extern void dept_map_reinit(struct dept_map *m, struct dept_key *k, int sub_u, const char *n);
+extern void dept_ext_wgen_init(struct dept_ext_wgen *ewg);
 extern void dept_map_copy(struct dept_map *to, struct dept_map *from);
 extern void dept_wait(struct dept_map *m, unsigned long w_f, unsigned long ip, const char *w_fn, int sub_l, long timeout);
 extern void dept_stage_wait(struct dept_map *m, struct dept_key *k, unsigned long ip, const char *w_fn, long timeout);
@@ -389,8 +397,8 @@ extern void dept_clean_stage(void);
 extern void dept_ttwu_stage_wait(struct task_struct *t, unsigned long ip);
 extern void dept_ecxt_enter(struct dept_map *m, unsigned long e_f, unsigned long ip, const char *c_fn, const char *e_fn, int sub_l);
 extern bool dept_ecxt_holding(struct dept_map *m, unsigned long e_f);
-extern void dept_request_event(struct dept_map *m);
-extern void dept_event(struct dept_map *m, unsigned long e_f, unsigned long ip, const char *e_fn);
+extern void dept_request_event(struct dept_map *m, struct dept_ext_wgen *ewg);
+extern void dept_event(struct dept_map *m, unsigned long e_f, unsigned long ip, const char *e_fn, struct dept_ext_wgen *ewg);
 extern void dept_ecxt_exit(struct dept_map *m, unsigned long e_f, unsigned long ip);
 extern void dept_sched_enter(void);
 extern void dept_sched_exit(void);
@@ -417,6 +425,7 @@ extern void dept_hardirqs_off(void);
 #else /* !CONFIG_DEPT */
 struct dept_key { };
 struct dept_map { };
+struct dept_ext_wgen { };
 
 #define DEPT_MAP_INITIALIZER(n, k) { }
 
@@ -429,6 +438,7 @@ struct dept_map { };
 
 #define dept_map_init(m, k, su, n)			do { (void)(n); (void)(k); } while (0)
 #define dept_map_reinit(m, k, su, n)			do { (void)(n); (void)(k); } while (0)
+#define dept_ext_wgen_init(wg)				do { } while (0)
 #define dept_map_copy(t, f)				do { } while (0)
 #define dept_wait(m, w_f, ip, w_fn, sl, t)		do { (void)(w_fn); } while (0)
 #define dept_stage_wait(m, k, ip, w_fn, t)		do { (void)(k); (void)(w_fn); } while (0)
@@ -437,8 +447,8 @@ struct dept_map { };
 #define dept_ttwu_stage_wait(t, ip)			do { } while (0)
 #define dept_ecxt_enter(m, e_f, ip, c_fn, e_fn, sl)	do { (void)(c_fn); (void)(e_fn); } while (0)
 #define dept_ecxt_holding(m, e_f)			false
-#define dept_request_event(m)				do { } while (0)
-#define dept_event(m, e_f, ip, e_fn)			do { (void)(e_fn); } while (0)
+#define dept_request_event(m, wg)			do { } while (0)
+#define dept_event(m, e_f, ip, e_fn, wg)		do { (void)(e_fn); } while (0)
 #define dept_ecxt_exit(m, e_f, ip)			do { } while (0)
 #define dept_sched_enter()				do { } while (0)
 #define dept_sched_exit()				do { } while (0)
diff --git a/include/linux/dept_sdt.h b/include/linux/dept_sdt.h
index 14917df0cc30..9cd70affaf35 100644
--- a/include/linux/dept_sdt.h
+++ b/include/linux/dept_sdt.h
@@ -25,7 +25,7 @@
 
 #define sdt_wait_timeout(m, t)						\
 	do {								\
-		dept_request_event(m);					\
+		dept_request_event(m, NULL);				\
 		dept_wait(m, 1UL, _THIS_IP_, __func__, 0, t);		\
 	} while (0)
 #define sdt_wait(m) sdt_wait_timeout(m, -1L)
@@ -49,9 +49,9 @@
 #define sdt_might_sleep_end()		dept_clean_stage()
 
 #define sdt_ecxt_enter(m)		dept_ecxt_enter(m, 1UL, _THIS_IP_, "start", "event", 0)
-#define sdt_event(m)			dept_event(m, 1UL, _THIS_IP_, __func__)
+#define sdt_event(m)			dept_event(m, 1UL, _THIS_IP_, __func__, NULL)
 #define sdt_ecxt_exit(m)		dept_ecxt_exit(m, 1UL, _THIS_IP_)
-#define sdt_request_event(m)		dept_request_event(m)
+#define sdt_request_event(m)		dept_request_event(m, NULL)
 #else /* !CONFIG_DEPT */
 #define sdt_map_init(m)			do { } while (0)
 #define sdt_map_init_key(m, k)		do { (void)(k); } while (0)
diff --git a/kernel/dependency/dept.c b/kernel/dependency/dept.c
index 519b2151403e..f928c12111fe 100644
--- a/kernel/dependency/dept.c
+++ b/kernel/dependency/dept.c
@@ -2172,6 +2172,11 @@ void dept_map_reinit(struct dept_map *m, struct dept_key *k, int sub_u,
 }
 EXPORT_SYMBOL_GPL(dept_map_reinit);
 
+void dept_ext_wgen_init(struct dept_ext_wgen *ewg)
+{
+	ewg->wgen = 0U;
+}
+
 void dept_map_copy(struct dept_map *to, struct dept_map *from)
 {
 	if (unlikely(!dept_working())) {
@@ -2355,7 +2360,7 @@ static void __dept_wait(struct dept_map *m, unsigned long w_f,
  */
 static void __dept_event(struct dept_map *m, struct dept_map *real_m,
 		unsigned long e_f, unsigned long ip, const char *e_fn,
-		bool sched_map)
+		bool sched_map, unsigned int wg)
 {
 	struct dept_class *c;
 	struct dept_key *k;
@@ -2377,7 +2382,7 @@ static void __dept_event(struct dept_map *m, struct dept_map *real_m,
 	c = check_new_class(&m->map_key, k, sub_id(m, e), m->name, sched_map);
 
 	if (c)
-		do_event(m, real_m, c, READ_ONCE(m->wgen), ip, e_fn);
+		do_event(m, real_m, c, wg, ip, e_fn);
 }
 
 void dept_wait(struct dept_map *m, unsigned long w_f,
@@ -2602,7 +2607,7 @@ void dept_ttwu_stage_wait(struct task_struct *requestor, unsigned long ip)
 	if (!m.keys)
 		goto exit;
 
-	__dept_event(&m, real_m, 1UL, ip, "try_to_wake_up", sched_map);
+	__dept_event(&m, real_m, 1UL, ip, "try_to_wake_up", sched_map, m.wgen);
 exit:
 	dept_exit(flags);
 }
@@ -2781,10 +2786,11 @@ bool dept_ecxt_holding(struct dept_map *m, unsigned long e_f)
 }
 EXPORT_SYMBOL_GPL(dept_ecxt_holding);
 
-void dept_request_event(struct dept_map *m)
+void dept_request_event(struct dept_map *m, struct dept_ext_wgen *ewg)
 {
 	unsigned long flags;
 	unsigned int wg;
+	unsigned int *wg_p;
 
 	if (unlikely(!dept_working()))
 		return;
@@ -2797,18 +2803,22 @@ void dept_request_event(struct dept_map *m)
 	 */
 	flags = dept_enter_recursive();
 
+	wg_p = ewg ? &ewg->wgen : &m->wgen;
+
 	wg = next_wgen();
-	WRITE_ONCE(m->wgen, wg);
+	WRITE_ONCE(*wg_p, wg);
 
 	dept_exit_recursive(flags);
 }
 EXPORT_SYMBOL_GPL(dept_request_event);
 
 void dept_event(struct dept_map *m, unsigned long e_f,
-		unsigned long ip, const char *e_fn)
+		unsigned long ip, const char *e_fn,
+		struct dept_ext_wgen *ewg)
 {
 	struct dept_task *dt = dept_task();
 	unsigned long flags;
+	unsigned int *wg_p;
 
 	if (unlikely(!dept_working()))
 		return;
@@ -2816,24 +2826,26 @@ void dept_event(struct dept_map *m, unsigned long e_f,
 	if (m->nocheck)
 		return;
 
+	wg_p = ewg ? &ewg->wgen : &m->wgen;
+
 	if (dt->recursive) {
 		/*
 		 * Dept won't work with this even though an event
 		 * context has been asked. Don't make it confused at
 		 * handling the event. Disable it until the next.
 		 */
-		WRITE_ONCE(m->wgen, 0U);
+		WRITE_ONCE(*wg_p, 0U);
 		return;
 	}
 
 	flags = dept_enter();
 
-	__dept_event(m, m, e_f, ip, e_fn, false);
+	__dept_event(m, m, e_f, ip, e_fn, false, READ_ONCE(*wg_p));
 
 	/*
 	 * Keep the map diabled until the next sleep.
 	 */
-	WRITE_ONCE(m->wgen, 0U);
+	WRITE_ONCE(*wg_p, 0U);
 
 	dept_exit(flags);
 }
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 22/47] dept: apply timeout consideration to hashed-waitqueue wait
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Now that CONFIG_DEPT_AGGRESSIVE_TIMEOUT_WAIT was introduced, apply the
consideration to hashed-waitqueue wait, assuming an input 'ret' in
___wait_var_event() macro is used as a timeout value.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/wait_bit.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 179a616ad245..9885ac4e1ded 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -258,7 +258,7 @@ extern wait_queue_head_t *__var_waitqueue(void *p);
 	struct wait_bit_queue_entry __wbq_entry;			\
 	long __ret = ret; /* explicit shadow */				\
 									\
-	sdt_might_sleep_start(NULL);					\
+	sdt_might_sleep_start_timeout(NULL, __ret);			\
 	init_wait_var_entry(&__wbq_entry, var,				\
 			    exclusive ? WQ_FLAG_EXCLUSIVE : 0);		\
 	for (;;) {							\
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 21/47] dept: apply timeout consideration to waitqueue wait
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Now that CONFIG_DEPT_AGGRESSIVE_TIMEOUT_WAIT was introduced, apply the
consideration to waitqueue wait, assuming an input 'ret' in
___wait_event() macro is used as a timeout value.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/wait.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 65dd50f25e54..902a2e5381db 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -306,7 +306,7 @@ extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
 	struct wait_queue_entry __wq_entry;					\
 	long __ret = ret;	/* explicit shadow */				\
 										\
-	sdt_might_sleep_start(NULL);						\
+	sdt_might_sleep_start_timeout(NULL, __ret);				\
 	init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);	\
 	for (;;) {								\
 		long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 20/47] dept: apply timeout consideration to swait
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Now that CONFIG_DEPT_AGGRESSIVE_TIMEOUT_WAIT was introduced, apply the
consideration to swait, assuming an input 'ret' in ___swait_event()
macro is used as a timeout value.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/swait.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/swait.h b/include/linux/swait.h
index 277ac74f61c3..233acdf55e9b 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -162,7 +162,7 @@ extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
 	struct swait_queue __wait;					\
 	long __ret = ret;						\
 									\
-	sdt_might_sleep_start(NULL);					\
+	sdt_might_sleep_start_timeout(NULL, __ret);			\
 	INIT_LIST_HEAD(&__wait.task_list);				\
 	for (;;) {							\
 		long __int = prepare_to_swait_event(&wq, &__wait, state);\
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 19/47] dept: apply timeout consideration to wait_for_completion()/complete()
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Now that CONFIG_DEPT_AGGRESSIVE_TIMEOUT_WAIT was introduced, apply the
consideration to wait_for_completion()/complete().

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/completion.h | 4 ++--
 kernel/sched/completion.c  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/completion.h b/include/linux/completion.h
index bd2c207481d6..3200b741de28 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -41,9 +41,9 @@ do {							\
  */
 #define init_completion_map(x, m) init_completion(x)
 
-static inline void complete_acquire(struct completion *x)
+static inline void complete_acquire(struct completion *x, long timeout)
 {
-	sdt_might_sleep_start(&x->dmap);
+	sdt_might_sleep_start_timeout(&x->dmap, timeout);
 }
 
 static inline void complete_release(struct completion *x)
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index 19ee702273c0..5e45a60ff7b3 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -115,7 +115,7 @@ __wait_for_common(struct completion *x,
 {
 	might_sleep();
 
-	complete_acquire(x);
+	complete_acquire(x, timeout);
 
 	raw_spin_lock_irq(&x->wait.lock);
 	timeout = do_wait_for_common(x, action, timeout, state);
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 18/47] dept: track timeout waits separately with a new Kconfig
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Waits with valid timeouts don't actually cause deadlocks.  However, dept
has been reporting the cases as well because it's worth informing the
circular dependency for some cases where, for example, timeout is used
to avoid a deadlock.

However, yes, there are also a lot of, even more, cases where timeout
is used for its clear purpose and meant to be expired.

Report these as an information rather than warning DEADLOCK.  Plus,
introduce CONFIG_DEPT_AGGRESSIVE_TIMEOUT_WAIT Kconfig to make it
optional so that any reports involving waits with timeouts can be turned
on/off depending on the purpose.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/dept.h     | 13 +++++---
 include/linux/dept_ldt.h |  6 ++--
 include/linux/dept_sdt.h | 13 +++++---
 include/linux/sched.h    |  2 ++
 kernel/dependency/dept.c | 66 ++++++++++++++++++++++++++++++++++------
 lib/Kconfig.debug        | 10 ++++++
 6 files changed, 89 insertions(+), 21 deletions(-)

diff --git a/include/linux/dept.h b/include/linux/dept.h
index cb1b1beea077..49f457390521 100644
--- a/include/linux/dept.h
+++ b/include/linux/dept.h
@@ -271,6 +271,11 @@ struct dept_wait {
 			 * whether this wait is for commit in scheduler
 			 */
 			bool		sched_sleep;
+
+			/*
+			 * whether a timeout is set
+			 */
+			bool				timeout;
 		};
 	};
 };
@@ -377,8 +382,8 @@ extern void dept_free_range(void *start, unsigned int sz);
 extern void dept_map_init(struct dept_map *m, struct dept_key *k, int sub_u, const char *n);
 extern void dept_map_reinit(struct dept_map *m, struct dept_key *k, int sub_u, const char *n);
 extern void dept_map_copy(struct dept_map *to, struct dept_map *from);
-extern void dept_wait(struct dept_map *m, unsigned long w_f, unsigned long ip, const char *w_fn, int sub_l);
-extern void dept_stage_wait(struct dept_map *m, struct dept_key *k, unsigned long ip, const char *w_fn);
+extern void dept_wait(struct dept_map *m, unsigned long w_f, unsigned long ip, const char *w_fn, int sub_l, long timeout);
+extern void dept_stage_wait(struct dept_map *m, struct dept_key *k, unsigned long ip, const char *w_fn, long timeout);
 extern void dept_request_event_wait_commit(void);
 extern void dept_clean_stage(void);
 extern void dept_ttwu_stage_wait(struct task_struct *t, unsigned long ip);
@@ -425,8 +430,8 @@ struct dept_map { };
 #define dept_map_init(m, k, su, n)			do { (void)(n); (void)(k); } while (0)
 #define dept_map_reinit(m, k, su, n)			do { (void)(n); (void)(k); } while (0)
 #define dept_map_copy(t, f)				do { } while (0)
-#define dept_wait(m, w_f, ip, w_fn, sl)			do { (void)(w_fn); } while (0)
-#define dept_stage_wait(m, k, ip, w_fn)			do { (void)(k); (void)(w_fn); } while (0)
+#define dept_wait(m, w_f, ip, w_fn, sl, t)		do { (void)(w_fn); } while (0)
+#define dept_stage_wait(m, k, ip, w_fn, t)		do { (void)(k); (void)(w_fn); } while (0)
 #define dept_request_event_wait_commit()		do { } while (0)
 #define dept_clean_stage()				do { } while (0)
 #define dept_ttwu_stage_wait(t, ip)			do { } while (0)
diff --git a/include/linux/dept_ldt.h b/include/linux/dept_ldt.h
index 8047d0a531f1..730af2517ecd 100644
--- a/include/linux/dept_ldt.h
+++ b/include/linux/dept_ldt.h
@@ -28,7 +28,7 @@
 		else if (t)						\
 			dept_ecxt_enter(m, LDT_EVT_L, i, "trylock", "unlock", sl);\
 		else {							\
-			dept_wait(m, LDT_EVT_L, i, "lock", sl);		\
+			dept_wait(m, LDT_EVT_L, i, "lock", sl, false);	\
 			dept_ecxt_enter(m, LDT_EVT_L, i, "lock", "unlock", sl);\
 		}							\
 	} while (0)
@@ -40,7 +40,7 @@
 		else if (t)						\
 			dept_ecxt_enter(m, LDT_EVT_R, i, "read_trylock", "read_unlock", sl);\
 		else {							\
-			dept_wait(m, q ? LDT_EVT_RW : LDT_EVT_W, i, "read_lock", sl);\
+			dept_wait(m, q ? LDT_EVT_RW : LDT_EVT_W, i, "read_lock", sl, false);\
 			dept_ecxt_enter(m, LDT_EVT_R, i, "read_lock", "read_unlock", sl);\
 		}							\
 	} while (0)
@@ -52,7 +52,7 @@
 		else if (t)						\
 			dept_ecxt_enter(m, LDT_EVT_W, i, "write_trylock", "write_unlock", sl);\
 		else {							\
-			dept_wait(m, LDT_EVT_RW, i, "write_lock", sl);	\
+			dept_wait(m, LDT_EVT_RW, i, "write_lock", sl, false);\
 			dept_ecxt_enter(m, LDT_EVT_W, i, "write_lock", "write_unlock", sl);\
 		}							\
 	} while (0)
diff --git a/include/linux/dept_sdt.h b/include/linux/dept_sdt.h
index 0535f763b21b..14917df0cc30 100644
--- a/include/linux/dept_sdt.h
+++ b/include/linux/dept_sdt.h
@@ -23,11 +23,12 @@
 
 #define sdt_map_init_key(m, k)		dept_map_init(m, k, 0, #m)
 
-#define sdt_wait(m)							\
+#define sdt_wait_timeout(m, t)						\
 	do {								\
 		dept_request_event(m);					\
-		dept_wait(m, 1UL, _THIS_IP_, __func__, 0);		\
+		dept_wait(m, 1UL, _THIS_IP_, __func__, 0, t);		\
 	} while (0)
+#define sdt_wait(m) sdt_wait_timeout(m, -1L)
 
 /*
  * sdt_might_sleep() and its family will be committed in __schedule()
@@ -38,13 +39,13 @@
 /*
  * Use the code location as the class key if an explicit map is not used.
  */
-#define sdt_might_sleep_start(m)					\
+#define sdt_might_sleep_start_timeout(m, t)				\
 	do {								\
 		struct dept_map *__m = m;				\
 		static struct dept_key __key;				\
-		dept_stage_wait(__m, __m ? NULL : &__key, _THIS_IP_, __func__);\
+		dept_stage_wait(__m, __m ? NULL : &__key, _THIS_IP_, __func__, t);\
 	} while (0)
-
+#define sdt_might_sleep_start(m)	sdt_might_sleep_start_timeout(m, -1L)
 #define sdt_might_sleep_end()		dept_clean_stage()
 
 #define sdt_ecxt_enter(m)		dept_ecxt_enter(m, 1UL, _THIS_IP_, "start", "event", 0)
@@ -54,7 +55,9 @@
 #else /* !CONFIG_DEPT */
 #define sdt_map_init(m)			do { } while (0)
 #define sdt_map_init_key(m, k)		do { (void)(k); } while (0)
+#define sdt_wait_timeout(m, t)		do { } while (0)
 #define sdt_wait(m)			do { } while (0)
+#define sdt_might_sleep_start_timeout(m, t) do { } while (0)
 #define sdt_might_sleep_start(m)	do { } while (0)
 #define sdt_might_sleep_end()		do { } while (0)
 #define sdt_ecxt_enter(m)		do { } while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 05c3f8a45405..95b1450f22fa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -867,6 +867,7 @@ struct dept_task {
 	bool				stage_sched_map;
 	const char			*stage_w_fn;
 	unsigned long			stage_ip;
+	bool				stage_timeout;
 	arch_spinlock_t			stage_lock;
 
 	/*
@@ -907,6 +908,7 @@ struct dept_task {
 	.stage_sched_map = false,				\
 	.stage_w_fn = NULL,					\
 	.stage_ip = 0UL,					\
+	.stage_timeout = false,					\
 	.stage_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED,\
 	.missing_ecxt = 0,					\
 	.hardirqs_enabled = false,				\
diff --git a/kernel/dependency/dept.c b/kernel/dependency/dept.c
index f4c08758f8db..519b2151403e 100644
--- a/kernel/dependency/dept.c
+++ b/kernel/dependency/dept.c
@@ -757,6 +757,8 @@ static void print_diagram(struct dept_dep *d)
 	if (!irqf) {
 		print_spc(spc, "[S] %s(%s:%d)\n", c_fn, fc_n, fc->sub_id);
 		print_spc(spc, "[W] %s(%s:%d)\n", w_fn, tc_n, tc->sub_id);
+		if (w->timeout)
+			print_spc(spc, "--------------- >8 timeout ---------------\n");
 		print_spc(spc, "[E] %s(%s:%d)\n", e_fn, fc_n, fc->sub_id);
 	}
 }
@@ -810,6 +812,24 @@ static void print_dep(struct dept_dep *d)
 
 static void save_current_stack(int skip);
 
+static bool is_timeout_wait_circle(struct dept_class *c)
+{
+	struct dept_class *fc = c->bfs_parent;
+	struct dept_class *tc = c;
+
+	do {
+		struct dept_dep *d = lookup_dep(fc, tc);
+
+		if (d->wait->timeout)
+			return true;
+
+		tc = fc;
+		fc = fc->bfs_parent;
+	} while (tc != c);
+
+	return false;
+}
+
 /*
  * Print all classes in a circle.
  */
@@ -832,10 +852,14 @@ static void print_circle(struct dept_class *c)
 	pr_warn("summary\n");
 	pr_warn("---------------------------------------------------\n");
 
-	if (fc == tc)
+	if (is_timeout_wait_circle(c)) {
+		pr_warn("NOT A DEADLOCK BUT A CIRCULAR DEPENDENCY\n");
+		pr_warn("CHECK IF THE TIMEOUT IS INTENDED\n\n");
+	} else if (fc == tc) {
 		pr_warn("*** AA DEADLOCK ***\n\n");
-	else
+	} else {
 		pr_warn("*** DEADLOCK ***\n\n");
+	}
 
 	i = 0;
 	do {
@@ -1579,7 +1603,8 @@ static int next_wgen(void)
 }
 
 static void add_wait(struct dept_class *c, unsigned long ip,
-		     const char *w_fn, int sub_l, bool sched_sleep)
+		     const char *w_fn, int sub_l, bool sched_sleep,
+		     bool timeout)
 {
 	struct dept_task *dt = dept_task();
 	struct dept_wait *w;
@@ -1599,6 +1624,7 @@ static void add_wait(struct dept_class *c, unsigned long ip,
 	w->wait_fn = w_fn;
 	w->wait_stack = get_current_stack();
 	w->sched_sleep = sched_sleep;
+	w->timeout = timeout;
 
 	cxt = cur_cxt();
 	if (cxt == DEPT_CXT_HIRQ || cxt == DEPT_CXT_SIRQ)
@@ -2297,7 +2323,7 @@ static struct dept_class *check_new_class(struct dept_key *local,
  */
 static void __dept_wait(struct dept_map *m, unsigned long w_f,
 			unsigned long ip, const char *w_fn, int sub_l,
-			bool sched_sleep, bool sched_map)
+			bool sched_sleep, bool sched_map, bool timeout)
 {
 	int e;
 
@@ -2320,7 +2346,7 @@ static void __dept_wait(struct dept_map *m, unsigned long w_f,
 		if (!c)
 			continue;
 
-		add_wait(c, ip, w_fn, sub_l, sched_sleep);
+		add_wait(c, ip, w_fn, sub_l, sched_sleep, timeout);
 	}
 }
 
@@ -2355,14 +2381,23 @@ static void __dept_event(struct dept_map *m, struct dept_map *real_m,
 }
 
 void dept_wait(struct dept_map *m, unsigned long w_f,
-	       unsigned long ip, const char *w_fn, int sub_l)
+	       unsigned long ip, const char *w_fn, int sub_l,
+	       long timeoutval)
 {
 	struct dept_task *dt = dept_task();
 	unsigned long flags;
+	bool timeout;
 
 	if (unlikely(!dept_working()))
 		return;
 
+	timeout = timeoutval > 0 && timeoutval < MAX_SCHEDULE_TIMEOUT;
+
+#if !defined(CONFIG_DEPT_AGGRESSIVE_TIMEOUT_WAIT)
+	if (timeout)
+		return;
+#endif
+
 	if (dt->recursive)
 		return;
 
@@ -2371,21 +2406,30 @@ void dept_wait(struct dept_map *m, unsigned long w_f,
 
 	flags = dept_enter();
 
-	__dept_wait(m, w_f, ip, w_fn, sub_l, false, false);
+	__dept_wait(m, w_f, ip, w_fn, sub_l, false, false, timeout);
 
 	dept_exit(flags);
 }
 EXPORT_SYMBOL_GPL(dept_wait);
 
 void dept_stage_wait(struct dept_map *m, struct dept_key *k,
-		     unsigned long ip, const char *w_fn)
+		     unsigned long ip, const char *w_fn,
+		     long timeoutval)
 {
 	struct dept_task *dt = dept_task();
 	unsigned long flags;
+	bool timeout;
 
 	if (unlikely(!dept_working()))
 		return;
 
+	timeout = timeoutval > 0 && timeoutval < MAX_SCHEDULE_TIMEOUT;
+
+#if !defined(CONFIG_DEPT_AGGRESSIVE_TIMEOUT_WAIT)
+	if (timeout)
+		return;
+#endif
+
 	if (m && m->nocheck)
 		return;
 
@@ -2434,6 +2478,7 @@ void dept_stage_wait(struct dept_map *m, struct dept_key *k,
 
 	dt->stage_w_fn = w_fn;
 	dt->stage_ip = ip;
+	dt->stage_timeout = timeout;
 	arch_spin_unlock(&dt->stage_lock);
 exit:
 	dept_exit_recursive(flags);
@@ -2447,6 +2492,7 @@ static void __dept_clean_stage(struct dept_task *dt)
 	dt->stage_sched_map = false;
 	dt->stage_w_fn = NULL;
 	dt->stage_ip = 0UL;
+	dt->stage_timeout = false;
 }
 
 void dept_clean_stage(void)
@@ -2479,6 +2525,7 @@ void dept_request_event_wait_commit(void)
 	unsigned long ip;
 	const char *w_fn;
 	bool sched_map;
+	bool timeout;
 
 	if (unlikely(!dept_working()))
 		return;
@@ -2505,12 +2552,13 @@ void dept_request_event_wait_commit(void)
 	w_fn = dt->stage_w_fn;
 	ip = dt->stage_ip;
 	sched_map = dt->stage_sched_map;
+	timeout = dt->stage_timeout;
 
 	wg = next_wgen();
 	WRITE_ONCE(dt->stage_m.wgen, wg);
 	arch_spin_unlock(&dt->stage_lock);
 
-	__dept_wait(&dt->stage_m, 1UL, ip, w_fn, 0, true, sched_map);
+	__dept_wait(&dt->stage_m, 1UL, ip, w_fn, 0, true, sched_map, timeout);
 exit:
 	dept_exit(flags);
 }
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 3669b069337b..290563fa8b58 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1394,6 +1394,16 @@ config DEPT
 	  noting, to mitigate the impact by the false positives, multi
 	  reporting has been supported.
 
+config DEPT_AGGRESSIVE_TIMEOUT_WAIT
+	bool "Aggressively track even timeout waits"
+	depends on DEPT
+	default n
+	help
+	  Timeout wait doesn't contribute to a deadlock. However,
+	  informing a circular dependency might be helpful for cases
+	  that timeout is used to avoid a deadlock. Say N if you'd like
+	  to avoid verbose reports.
+
 config LOCK_DEBUGGING_SUPPORT
 	bool
 	depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 17/47] dept: apply sdt_might_sleep_{start,end}() to dma fence
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Make dept able to track dependencies by dma fence waits and signals.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 drivers/dma-buf/dma-fence.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 3f78c56b58dc..787fa10a49f1 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -16,6 +16,7 @@
 #include <linux/dma-fence.h>
 #include <linux/sched/signal.h>
 #include <linux/seq_file.h>
+#include <linux/dept_sdt.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/dma_fence.h>
@@ -800,6 +801,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 	cb.task = current;
 	list_add(&cb.base.node, &fence->cb_list);
 
+	sdt_might_sleep_start(NULL);
 	while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) {
 		if (intr)
 			__set_current_state(TASK_INTERRUPTIBLE);
@@ -813,6 +815,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 		if (ret > 0 && intr && signal_pending(current))
 			ret = -ERESTARTSYS;
 	}
+	sdt_might_sleep_end();
 
 	if (!list_empty(&cb.base.node))
 		list_del(&cb.base.node);
@@ -902,6 +905,7 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
 		}
 	}
 
+	sdt_might_sleep_start(NULL);
 	while (ret > 0) {
 		if (intr)
 			set_current_state(TASK_INTERRUPTIBLE);
@@ -916,6 +920,7 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
 		if (ret > 0 && intr && signal_pending(current))
 			ret = -ERESTARTSYS;
 	}
+	sdt_might_sleep_end();
 
 	__set_current_state(TASK_RUNNING);
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 15/47] dept: apply sdt_might_sleep_{start,end}() to waitqueue wait
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Make dept able to track dependencies by waitqueue waits.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/wait.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 09855d819418..65dd50f25e54 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -7,6 +7,7 @@
 #include <linux/list.h>
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
+#include <linux/dept_sdt.h>
 
 #include <asm/current.h>
 
@@ -305,6 +306,7 @@ extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
 	struct wait_queue_entry __wq_entry;					\
 	long __ret = ret;	/* explicit shadow */				\
 										\
+	sdt_might_sleep_start(NULL);						\
 	init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0);	\
 	for (;;) {								\
 		long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\
@@ -323,6 +325,7 @@ extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
 			break;							\
 	}									\
 	finish_wait(&wq_head, &__wq_entry);					\
+	sdt_might_sleep_end();							\
 __out:	__ret;									\
 })
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 16/47] dept: apply sdt_might_sleep_{start,end}() to hashed-waitqueue wait
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Make dept able to track dependencies by hashed-waitqueue waits.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/wait_bit.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 9e29d79fc790..179a616ad245 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -6,6 +6,7 @@
  * Linux wait-bit related types and methods:
  */
 #include <linux/wait.h>
+#include <linux/dept_sdt.h>
 
 struct wait_bit_key {
 	unsigned long		*flags;
@@ -257,6 +258,7 @@ extern wait_queue_head_t *__var_waitqueue(void *p);
 	struct wait_bit_queue_entry __wbq_entry;			\
 	long __ret = ret; /* explicit shadow */				\
 									\
+	sdt_might_sleep_start(NULL);					\
 	init_wait_var_entry(&__wbq_entry, var,				\
 			    exclusive ? WQ_FLAG_EXCLUSIVE : 0);		\
 	for (;;) {							\
@@ -274,6 +276,7 @@ extern wait_queue_head_t *__var_waitqueue(void *p);
 		cmd;							\
 	}								\
 	finish_wait(__wq_head, &__wbq_entry.wq_entry);			\
+	sdt_might_sleep_end();						\
 __out:	__ret;								\
 })
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 14/47] dept: apply sdt_might_sleep_{start,end}() to swait
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Make dept able to track dependencies by swaits.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/swait.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/swait.h b/include/linux/swait.h
index d324419482a0..277ac74f61c3 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -6,6 +6,7 @@
 #include <linux/stddef.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
+#include <linux/dept_sdt.h>
 #include <asm/current.h>
 
 /*
@@ -161,6 +162,7 @@ extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
 	struct swait_queue __wait;					\
 	long __ret = ret;						\
 									\
+	sdt_might_sleep_start(NULL);					\
 	INIT_LIST_HEAD(&__wait.task_list);				\
 	for (;;) {							\
 		long __int = prepare_to_swait_event(&wq, &__wait, state);\
@@ -176,6 +178,7 @@ extern void finish_swait(struct swait_queue_head *q, struct swait_queue *wait);
 		cmd;							\
 	}								\
 	finish_swait(&wq, &__wait);					\
+	sdt_might_sleep_end();						\
 __out:	__ret;								\
 })
 
-- 
2.17.1


^ permalink raw reply related

* [PATCH v17 13/47] dept: apply sdt_might_sleep_{start,end}() to wait_for_completion()/complete()
From: Byungchul Park @ 2025-10-02  8:12 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel_team, torvalds, damien.lemoal, linux-ide, adilger.kernel,
	linux-ext4, mingo, peterz, will, tglx, rostedt, joel, sashal,
	daniel.vetter, duyuyang, johannes.berg, tj, tytso, willy, david,
	amir73il, gregkh, kernel-team, linux-mm, akpm, mhocko, minchan,
	hannes, vdavydov.dev, sj, jglisse, dennis, cl, penberg, rientjes,
	vbabka, ngupta, linux-block, josef, linux-fsdevel, jack, jlayton,
	dan.j.williams, hch, djwong, dri-devel, rodrigosiqueiramelo,
	melissa.srw, hamohammed.sa, harry.yoo, chris.p.wilson,
	gwan-gyeong.mun, max.byungchul.park, boqun.feng, longman,
	yunseong.kim, ysk, yeoreum.yun, netdev, matthew.brost, her0gyugyu,
	corbet, catalin.marinas, bp, dave.hansen, x86, hpa, luto,
	sumit.semwal, gustavo, christian.koenig, andi.shyti, arnd,
	lorenzo.stoakes, Liam.Howlett, rppt, surenb, mcgrof, petr.pavlu,
	da.gomez, samitolvanen, paulmck, frederic, neeraj.upadhyay,
	joelagnelf, josh, urezki, mathieu.desnoyers, jiangshanlai,
	qiang.zhang, juri.lelli, vincent.guittot, dietmar.eggemann,
	bsegall, mgorman, vschneid, chuck.lever, neil, okorniev, Dai.Ngo,
	tom, trondmy, anna, kees, bigeasy, clrkwllms, mark.rutland,
	ada.coupriediaz, kristina.martsenko, wangkefeng.wang, broonie,
	kevin.brodsky, dwmw, shakeel.butt, ast, ziy, yuzhao, baolin.wang,
	usamaarif642, joel.granados, richard.weiyang, geert+renesas,
	tim.c.chen, linux, alexander.shishkin, lillian, chenhuacai,
	francesco, guoweikang.kernel, link, jpoimboe, masahiroy, brauner,
	thomas.weissschuh, oleg, mjguzik, andrii, wangfushuai, linux-doc,
	linux-arm-kernel, linux-media, linaro-mm-sig, linux-i2c,
	linux-arch, linux-modules, rcu, linux-nfs, linux-rt-devel
In-Reply-To: <20251002081247.51255-1-byungchul@sk.com>

Make dept able to track dependencies by wait_for_completion()/complete().

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/linux/completion.h | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/include/linux/completion.h b/include/linux/completion.h
index fb2915676574..bd2c207481d6 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -10,6 +10,7 @@
  */
 
 #include <linux/swait.h>
+#include <linux/dept_sdt.h>
 
 /*
  * struct completion - structure used to maintain state for a "completion"
@@ -26,14 +27,33 @@
 struct completion {
 	unsigned int done;
 	struct swait_queue_head wait;
+	struct dept_map dmap;
 };
 
+#define init_completion(x)				\
+do {							\
+	sdt_map_init(&(x)->dmap);			\
+	__init_completion(x);				\
+} while (0)
+
+/*
+ * XXX: No use cases for now. Fill the body when needed.
+ */
 #define init_completion_map(x, m) init_completion(x)
-static inline void complete_acquire(struct completion *x) {}
-static inline void complete_release(struct completion *x) {}
+
+static inline void complete_acquire(struct completion *x)
+{
+	sdt_might_sleep_start(&x->dmap);
+}
+
+static inline void complete_release(struct completion *x)
+{
+	sdt_might_sleep_end();
+}
 
 #define COMPLETION_INITIALIZER(work) \
-	{ 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait) }
+	{ 0, __SWAIT_QUEUE_HEAD_INITIALIZER((work).wait), \
+	  .dmap = DEPT_MAP_INITIALIZER(work, NULL), }
 
 #define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \
 	(*({ init_completion_map(&(work), &(map)); &(work); }))
@@ -75,13 +95,13 @@ static inline void complete_release(struct completion *x) {}
 #endif
 
 /**
- * init_completion - Initialize a dynamically allocated completion
+ * __init_completion - Initialize a dynamically allocated completion
  * @x:  pointer to completion structure that is to be initialized
  *
  * This inline function will initialize a dynamically created completion
  * structure.
  */
-static inline void init_completion(struct completion *x)
+static inline void __init_completion(struct completion *x)
 {
 	x->done = 0;
 	init_swait_queue_head(&x->wait);
-- 
2.17.1


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox