[PATCH 0/4] lockdep cmp fn conversions

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/4] lockdep cmp fn conversions
@ 2024-01-27  2:01 Kent Overstreet
  2024-01-27  2:01 ` [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn Kent Overstreet
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Kent Overstreet @ 2024-01-27  2:01 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-fsdevel; +Cc: Kent Overstreet, peterz, boqun.feng

rationale:
*_lock_nested() is fundamentally broken - in order for lockdep to work
we need to be able to check that we're following some defined ordering,
and it's not possible to define a total ordering of an arbitrary number
of objects with only a small fixed size enum.

so it needs to go. awhile back I added the ability to set a comparison
function for a lock class, and this is the start of hopefully a slow
steady trickle of patches as time allows to convert code to use it.

Kent Overstreet (4):
  fs/pipe: Convert to lockdep_cmp_fn
  pktcdvd: kill mutex_lock_nested() usage
  net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order()
  af_unix: convert to lock_cmp_fn

 drivers/block/pktcdvd.c  |  8 ++---
 fs/pipe.c                | 77 ++++++++++++++++------------------------
 include/linux/lockdep.h  |  3 ++
 include/net/af_unix.h    |  3 --
 kernel/locking/lockdep.c |  6 ++++
 net/core/sock.c          |  1 +
 net/unix/af_unix.c       | 24 ++++++-------
 net/unix/diag.c          |  2 +-
 8 files changed, 55 insertions(+), 69 deletions(-)

-- 
2.43.0

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn
  2024-01-27  2:01 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
@ 2024-01-27  2:01 ` Kent Overstreet
  2024-01-29 14:09   ` (subset) " Christian Brauner
  2024-01-29 15:21   ` Jan Kara
  2024-01-27  2:01 ` [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage Kent Overstreet
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 12+ messages in thread
From: Kent Overstreet @ 2024-01-27  2:01 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-fsdevel
  Cc: Kent Overstreet, peterz, boqun.feng, Alexander Viro,
	Christian Brauner, Jan Kara

*_lock_nested() is fundamentally broken; lockdep needs to check lock
ordering, but we cannot device a total ordering on an unbounded number
of elements with only a few subclasses.

the replacement is to define lock ordering with a proper comparison
function.

fs/pipe.c was already doing everything correctly otherwise, nothing
much changes here.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
 1 file changed, 36 insertions(+), 45 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index f1adbfe743d4..50c8a8596b52 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
  */
 
-static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
+#define cmp_int(l, r)		((l > r) - (l < r))
+
+#ifdef CONFIG_PROVE_LOCKING
+static int pipe_lock_cmp_fn(const struct lockdep_map *a,
+			    const struct lockdep_map *b)
 {
-	if (pipe->files)
-		mutex_lock_nested(&pipe->mutex, subclass);
+	return cmp_int((unsigned long) a, (unsigned long) b);
 }
+#endif
 
 void pipe_lock(struct pipe_inode_info *pipe)
 {
-	/*
-	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
-	 */
-	pipe_lock_nested(pipe, I_MUTEX_PARENT);
+	if (pipe->files)
+		mutex_lock(&pipe->mutex);
 }
 EXPORT_SYMBOL(pipe_lock);
 
@@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_unlock);
 
-static inline void __pipe_lock(struct pipe_inode_info *pipe)
-{
-	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
-}
-
-static inline void __pipe_unlock(struct pipe_inode_info *pipe)
-{
-	mutex_unlock(&pipe->mutex);
-}
-
 void pipe_double_lock(struct pipe_inode_info *pipe1,
 		      struct pipe_inode_info *pipe2)
 {
 	BUG_ON(pipe1 == pipe2);
 
-	if (pipe1 < pipe2) {
-		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
-		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
-	} else {
-		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
-		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
-	}
+	if (pipe1 > pipe2)
+		swap(pipe1, pipe2);
+
+	pipe_lock(pipe1);
+	pipe_lock(pipe2);
 }
 
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
@@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		return 0;
 
 	ret = 0;
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 
 	/*
 	 * We only wake up writers if the pipe was full when we started
@@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 			ret = -EAGAIN;
 			break;
 		}
-		__pipe_unlock(pipe);
+		mutex_unlock(&pipe->mutex);
 
 		/*
 		 * We only get here if we didn't actually read anything.
@@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
 		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
 			return -ERESTARTSYS;
 
-		__pipe_lock(pipe);
+		mutex_lock(&pipe->mutex);
 		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
 		wake_next_reader = true;
 	}
 	if (pipe_empty(pipe->head, pipe->tail))
 		wake_next_reader = false;
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 
 	if (was_full)
 		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
@@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 	if (unlikely(total_len == 0))
 		return 0;
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 
 	if (!pipe->readers) {
 		send_sig(SIGPIPE, current, 0);
@@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
 		 * after waiting we need to re-check whether the pipe
 		 * become empty while we dropped the lock.
 		 */
-		__pipe_unlock(pipe);
+		mutex_unlock(&pipe->mutex);
 		if (was_empty)
 			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
-		__pipe_lock(pipe);
+		mutex_lock(&pipe->mutex);
 		was_empty = pipe_empty(pipe->head, pipe->tail);
 		wake_next_writer = true;
 	}
 out:
 	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
 		wake_next_writer = false;
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 
 	/*
 	 * If we do do a wakeup event, we do a 'sync' wakeup, because we
@@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case FIONREAD:
-		__pipe_lock(pipe);
+		mutex_lock(&pipe->mutex);
 		count = 0;
 		head = pipe->head;
 		tail = pipe->tail;
@@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			count += pipe->bufs[tail & mask].len;
 			tail++;
 		}
-		__pipe_unlock(pipe);
+		mutex_unlock(&pipe->mutex);
 
 		return put_user(count, (int __user *)arg);
 
 #ifdef CONFIG_WATCH_QUEUE
 	case IOC_WATCH_QUEUE_SET_SIZE: {
 		int ret;
-		__pipe_lock(pipe);
+		mutex_lock(&pipe->mutex);
 		ret = watch_queue_set_size(pipe, arg);
-		__pipe_unlock(pipe);
+		mutex_unlock(&pipe->mutex);
 		return ret;
 	}
 
@@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
 {
 	struct pipe_inode_info *pipe = file->private_data;
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 	if (file->f_mode & FMODE_READ)
 		pipe->readers--;
 	if (file->f_mode & FMODE_WRITE)
@@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
 	}
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 
 	put_pipe_info(inode, pipe);
 	return 0;
@@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
 	struct pipe_inode_info *pipe = filp->private_data;
 	int retval = 0;
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 	if (filp->f_mode & FMODE_READ)
 		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
 	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
 			/* this can happen only if on == T */
 			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
 	}
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 	return retval;
 }
 
@@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
 		pipe->nr_accounted = pipe_bufs;
 		pipe->user = user;
 		mutex_init(&pipe->mutex);
+		lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
 		return pipe;
 	}
 
@@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	filp->private_data = pipe;
 	/* OK, we have a pipe and it's pinned down */
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 
 	/* We can only do regular read/write on fifos */
 	stream_open(inode, filp);
@@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	}
 
 	/* Ok! */
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 	return 0;
 
 err_rd:
@@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
 	goto err;
 
 err:
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 
 	put_pipe_info(inode, pipe);
 	return ret;
@@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
 	if (!pipe)
 		return -EBADF;
 
-	__pipe_lock(pipe);
+	mutex_lock(&pipe->mutex);
 
 	switch (cmd) {
 	case F_SETPIPE_SZ:
@@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
 		break;
 	}
 
-	__pipe_unlock(pipe);
+	mutex_unlock(&pipe->mutex);
 	return ret;
 }
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage
  2024-01-27  2:01 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
  2024-01-27  2:01 ` [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn Kent Overstreet
@ 2024-01-27  2:01 ` Kent Overstreet
  2024-01-27  2:01 ` [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order() Kent Overstreet
  2024-01-27  2:01 ` [PATCH 4/4] af_unix: convert to lock_cmp_fn Kent Overstreet
  3 siblings, 0 replies; 12+ messages in thread
From: Kent Overstreet @ 2024-01-27  2:01 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-fsdevel
  Cc: Kent Overstreet, peterz, boqun.feng, linux-block, Jens Axboe

Unecessary, we're not actually taking nested locks of the same type.

Cc: linux-block@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 drivers/block/pktcdvd.c  |  8 ++++----
 fs/pipe.c                | 10 +---------
 include/linux/lockdep.h  |  3 +++
 kernel/locking/lockdep.c |  6 ++++++
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index d56d972aadb3..2eb68a624fda 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -332,7 +332,7 @@ static ssize_t device_map_show(const struct class *c, const struct class_attribu
 {
 	int n = 0;
 	int idx;
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ctl_mutex);
 	for (idx = 0; idx < MAX_WRITERS; idx++) {
 		struct pktcdvd_device *pd = pkt_devs[idx];
 		if (!pd)
@@ -2639,7 +2639,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 	struct pktcdvd_device *pd;
 	struct gendisk *disk;
 
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ctl_mutex);
 
 	for (idx = 0; idx < MAX_WRITERS; idx++)
 		if (!pkt_devs[idx])
@@ -2729,7 +2729,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
 	int idx;
 	int ret = 0;
 
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ctl_mutex);
 
 	for (idx = 0; idx < MAX_WRITERS; idx++) {
 		pd = pkt_devs[idx];
@@ -2780,7 +2780,7 @@ static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
 {
 	struct pktcdvd_device *pd;
 
-	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ctl_mutex);
 
 	pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
 	if (pd) {
diff --git a/fs/pipe.c b/fs/pipe.c
index 50c8a8596b52..abe171566015 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -78,14 +78,6 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
 
 #define cmp_int(l, r)		((l > r) - (l < r))
 
-#ifdef CONFIG_PROVE_LOCKING
-static int pipe_lock_cmp_fn(const struct lockdep_map *a,
-			    const struct lockdep_map *b)
-{
-	return cmp_int((unsigned long) a, (unsigned long) b);
-}
-#endif
-
 void pipe_lock(struct pipe_inode_info *pipe)
 {
 	if (pipe->files)
@@ -824,7 +816,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
 		pipe->nr_accounted = pipe_bufs;
 		pipe->user = user;
 		mutex_init(&pipe->mutex);
-		lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
+		lock_set_cmp_fn_ptr_order(&pipe->mutex);
 		return pipe;
 	}
 
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 08b0d1d9d78b..e0b121f96c80 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -391,6 +391,7 @@ extern int lockdep_is_held(const void *);
 #endif /* !LOCKDEP */
 
 #ifdef CONFIG_PROVE_LOCKING
+int lockdep_ptr_order_cmp_fn(const struct lockdep_map *, const struct lockdep_map *);
 void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn);
 
 #define lock_set_cmp_fn(lock, ...)	lockdep_set_lock_cmp_fn(&(lock)->dep_map, __VA_ARGS__)
@@ -398,6 +399,8 @@ void lockdep_set_lock_cmp_fn(struct lockdep_map *, lock_cmp_fn, lock_print_fn);
 #define lock_set_cmp_fn(lock, ...)	do { } while (0)
 #endif
 
+#define lock_set_cmp_fn_ptr_order(lock)	lock_set_cmp_fn(lock, lockdep_ptr_order_cmp_fn);
+
 enum xhlock_context_t {
 	XHLOCK_HARD,
 	XHLOCK_SOFT,
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 151bd3de5936..5630be7f5cb2 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4919,6 +4919,12 @@ struct lock_class_key __lockdep_no_validate__;
 EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
 
 #ifdef CONFIG_PROVE_LOCKING
+int lockdep_ptr_order_cmp_fn(const struct lockdep_map *a,
+			     const struct lockdep_map *b)
+{
+	return cmp_int((unsigned long) a, (unsigned long) b);
+}
+
 void lockdep_set_lock_cmp_fn(struct lockdep_map *lock, lock_cmp_fn cmp_fn,
 			     lock_print_fn print_fn)
 {
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order()
  2024-01-27  2:01 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
  2024-01-27  2:01 ` [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn Kent Overstreet
  2024-01-27  2:01 ` [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage Kent Overstreet
@ 2024-01-27  2:01 ` Kent Overstreet
  2024-01-27  2:01 ` [PATCH 4/4] af_unix: convert to lock_cmp_fn Kent Overstreet
  3 siblings, 0 replies; 12+ messages in thread
From: Kent Overstreet @ 2024-01-27  2:01 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-fsdevel; +Cc: Kent Overstreet, peterz, boqun.feng

Cc: netdev@vger.kernel.org
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 net/core/sock.c    | 1 +
 net/unix/af_unix.c | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index 158dbdebce6a..da7360c0f454 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3474,6 +3474,7 @@ void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
 	sk->sk_peer_pid 	=	NULL;
 	sk->sk_peer_cred	=	NULL;
 	spin_lock_init(&sk->sk_peer_lock);
+	lock_set_cmp_fn_ptr_order(&sk->sk_peer_lock);
 
 	sk->sk_write_pending	=	0;
 	sk->sk_rcvlowat		=	1;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ac1f2bc18fc9..d013de3c5490 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -706,10 +706,10 @@ static void copy_peercred(struct sock *sk, struct sock *peersk)
 
 	if (sk < peersk) {
 		spin_lock(&sk->sk_peer_lock);
-		spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+		spin_lock(&peersk->sk_peer_lock);
 	} else {
 		spin_lock(&peersk->sk_peer_lock);
-		spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+		spin_lock(&sk->sk_peer_lock);
 	}
 	old_pid = sk->sk_peer_pid;
 	old_cred = sk->sk_peer_cred;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/4] af_unix: convert to lock_cmp_fn
  2024-01-27  2:01 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
                   ` (2 preceding siblings ...)
  2024-01-27  2:01 ` [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order() Kent Overstreet
@ 2024-01-27  2:01 ` Kent Overstreet
  3 siblings, 0 replies; 12+ messages in thread
From: Kent Overstreet @ 2024-01-27  2:01 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-fsdevel; +Cc: Kent Overstreet, peterz, boqun.feng

Kill
 - unix_state_lock_nested
 - _nested usage for net->unx.table.locks[].

replace both with lock_set_cmp_fn_ptr_order(&u->lock).

The lock ordering in sk_diag_dump_icons() looks suspicious; this may
turn up a real issue.

Cc: netdev@vger.kernel.org
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/net/af_unix.h |  3 ---
 net/unix/af_unix.c    | 20 ++++++++------------
 net/unix/diag.c       |  2 +-
 3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 49c4640027d8..4eff0a089640 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -48,9 +48,6 @@ struct scm_stat {
 
 #define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
 #define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
-#define unix_state_lock_nested(s) \
-				spin_lock_nested(&unix_sk(s)->lock, \
-				SINGLE_DEPTH_NESTING)
 
 /* The AF_UNIX socket */
 struct unix_sock {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d013de3c5490..1a0d273799c1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -170,7 +170,7 @@ static void unix_table_double_lock(struct net *net,
 		swap(hash1, hash2);
 
 	spin_lock(&net->unx.table.locks[hash1]);
-	spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
+	spin_lock(&net->unx.table.locks[hash2]);
 }
 
 static void unix_table_double_unlock(struct net *net,
@@ -997,6 +997,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
 	u->path.dentry = NULL;
 	u->path.mnt = NULL;
 	spin_lock_init(&u->lock);
+	lock_set_cmp_fn_ptr_order(&u->lock);
 	atomic_long_set(&u->inflight, 0);
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->iolock); /* single task reading lock */
@@ -1340,17 +1341,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 {
-	if (unlikely(sk1 == sk2) || !sk2) {
-		unix_state_lock(sk1);
-		return;
-	}
-	if (sk1 < sk2) {
+	if (sk1 > sk2)
+		swap(sk1, sk2);
+	if (sk1 && sk1 != sk2)
 		unix_state_lock(sk1);
-		unix_state_lock_nested(sk2);
-	} else {
-		unix_state_lock(sk2);
-		unix_state_lock_nested(sk1);
-	}
+	unix_state_lock(sk2);
 }
 
 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1591,7 +1586,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 		goto out_unlock;
 	}
 
-	unix_state_lock_nested(sk);
+	unix_state_lock(sk);
 
 	if (sk->sk_state != st) {
 		unix_state_unlock(sk);
@@ -3575,6 +3570,7 @@ static int __net_init unix_net_init(struct net *net)
 
 	for (i = 0; i < UNIX_HASH_SIZE; i++) {
 		spin_lock_init(&net->unx.table.locks[i]);
+		lock_set_cmp_fn_ptr_order(&net->unx.table.locks[i]);
 		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
 	}
 
diff --git a/net/unix/diag.c b/net/unix/diag.c
index bec09a3a1d44..8ab5e2217e4c 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
 			 * queue lock. With the other's queue locked it's
 			 * OK to lock the state.
 			 */
-			unix_state_lock_nested(req);
+			unix_state_lock(req);
 			peer = unix_sk(req)->peer;
 			buf[i++] = (peer ? sock_i_ino(peer) : 0);
 			unix_state_unlock(req);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/4] af_unix: convert to lock_cmp_fn
  2024-01-27  2:08 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
@ 2024-01-27  2:08 ` Kent Overstreet
  2024-01-28  8:28   ` Kuniyuki Iwashima
  0 siblings, 1 reply; 12+ messages in thread
From: Kent Overstreet @ 2024-01-27  2:08 UTC (permalink / raw)
  To: linux-kernel, netdev, linux-fsdevel; +Cc: Kent Overstreet, peterz, boqun.feng

Kill
 - unix_state_lock_nested
 - _nested usage for net->unx.table.locks[].

replace both with lock_set_cmp_fn_ptr_order(&u->lock).

The lock ordering in sk_diag_dump_icons() looks suspicious; this may
turn up a real issue.

Cc: netdev@vger.kernel.org
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 include/net/af_unix.h |  3 ---
 net/unix/af_unix.c    | 20 ++++++++------------
 net/unix/diag.c       |  2 +-
 3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 49c4640027d8..4eff0a089640 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -48,9 +48,6 @@ struct scm_stat {
 
 #define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
 #define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
-#define unix_state_lock_nested(s) \
-				spin_lock_nested(&unix_sk(s)->lock, \
-				SINGLE_DEPTH_NESTING)
 
 /* The AF_UNIX socket */
 struct unix_sock {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d013de3c5490..1a0d273799c1 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -170,7 +170,7 @@ static void unix_table_double_lock(struct net *net,
 		swap(hash1, hash2);
 
 	spin_lock(&net->unx.table.locks[hash1]);
-	spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
+	spin_lock(&net->unx.table.locks[hash2]);
 }
 
 static void unix_table_double_unlock(struct net *net,
@@ -997,6 +997,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
 	u->path.dentry = NULL;
 	u->path.mnt = NULL;
 	spin_lock_init(&u->lock);
+	lock_set_cmp_fn_ptr_order(&u->lock);
 	atomic_long_set(&u->inflight, 0);
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->iolock); /* single task reading lock */
@@ -1340,17 +1341,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 
 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 {
-	if (unlikely(sk1 == sk2) || !sk2) {
-		unix_state_lock(sk1);
-		return;
-	}
-	if (sk1 < sk2) {
+	if (sk1 > sk2)
+		swap(sk1, sk2);
+	if (sk1 && sk1 != sk2)
 		unix_state_lock(sk1);
-		unix_state_lock_nested(sk2);
-	} else {
-		unix_state_lock(sk2);
-		unix_state_lock_nested(sk1);
-	}
+	unix_state_lock(sk2);
 }
 
 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1591,7 +1586,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 		goto out_unlock;
 	}
 
-	unix_state_lock_nested(sk);
+	unix_state_lock(sk);
 
 	if (sk->sk_state != st) {
 		unix_state_unlock(sk);
@@ -3575,6 +3570,7 @@ static int __net_init unix_net_init(struct net *net)
 
 	for (i = 0; i < UNIX_HASH_SIZE; i++) {
 		spin_lock_init(&net->unx.table.locks[i]);
+		lock_set_cmp_fn_ptr_order(&net->unx.table.locks[i]);
 		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
 	}
 
diff --git a/net/unix/diag.c b/net/unix/diag.c
index bec09a3a1d44..8ab5e2217e4c 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
 			 * queue lock. With the other's queue locked it's
 			 * OK to lock the state.
 			 */
-			unix_state_lock_nested(req);
+			unix_state_lock(req);
 			peer = unix_sk(req)->peer;
 			buf[i++] = (peer ? sock_i_ino(peer) : 0);
 			unix_state_unlock(req);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] af_unix: convert to lock_cmp_fn
  2024-01-27  2:08 ` [PATCH 4/4] af_unix: convert to lock_cmp_fn Kent Overstreet
@ 2024-01-28  8:28   ` Kuniyuki Iwashima
  2024-01-28 19:38     ` Kent Overstreet
  0 siblings, 1 reply; 12+ messages in thread
From: Kuniyuki Iwashima @ 2024-01-28  8:28 UTC (permalink / raw)
  To: kent.overstreet
  Cc: boqun.feng, linux-fsdevel, linux-kernel, netdev, peterz, kuniyu

From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Fri, 26 Jan 2024 21:08:31 -0500
> Kill
>  - unix_state_lock_nested
>  - _nested usage for net->unx.table.locks[].
> 
> replace both with lock_set_cmp_fn_ptr_order(&u->lock).
> 
> The lock ordering in sk_diag_dump_icons() looks suspicious; this may
> turn up a real issue.

Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested().

The lock order in sk_diag_dump_icons() is

  listening socket -> child socket in the listener's queue

, and the inverse order never happens.  ptr comparison does not make
sense in this case, and lockdep will complain about false positive.


> 
> Cc: netdev@vger.kernel.org
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
> ---
>  include/net/af_unix.h |  3 ---
>  net/unix/af_unix.c    | 20 ++++++++------------
>  net/unix/diag.c       |  2 +-
>  3 files changed, 9 insertions(+), 16 deletions(-)
> 
> diff --git a/include/net/af_unix.h b/include/net/af_unix.h
> index 49c4640027d8..4eff0a089640 100644
> --- a/include/net/af_unix.h
> +++ b/include/net/af_unix.h
> @@ -48,9 +48,6 @@ struct scm_stat {
>  
>  #define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
>  #define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
> -#define unix_state_lock_nested(s) \
> -				spin_lock_nested(&unix_sk(s)->lock, \
> -				SINGLE_DEPTH_NESTING)
>  
>  /* The AF_UNIX socket */
>  struct unix_sock {
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index d013de3c5490..1a0d273799c1 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -170,7 +170,7 @@ static void unix_table_double_lock(struct net *net,
>  		swap(hash1, hash2);
>  
>  	spin_lock(&net->unx.table.locks[hash1]);
> -	spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
> +	spin_lock(&net->unx.table.locks[hash2]);
>  }
>  
>  static void unix_table_double_unlock(struct net *net,
> @@ -997,6 +997,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
>  	u->path.dentry = NULL;
>  	u->path.mnt = NULL;
>  	spin_lock_init(&u->lock);
> +	lock_set_cmp_fn_ptr_order(&u->lock);
>  	atomic_long_set(&u->inflight, 0);
>  	INIT_LIST_HEAD(&u->link);
>  	mutex_init(&u->iolock); /* single task reading lock */
> @@ -1340,17 +1341,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
>  
>  static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
>  {
> -	if (unlikely(sk1 == sk2) || !sk2) {
> -		unix_state_lock(sk1);
> -		return;
> -	}
> -	if (sk1 < sk2) {
> +	if (sk1 > sk2)
> +		swap(sk1, sk2);
> +	if (sk1 && sk1 != sk2)
>  		unix_state_lock(sk1);
> -		unix_state_lock_nested(sk2);
> -	} else {
> -		unix_state_lock(sk2);
> -		unix_state_lock_nested(sk1);
> -	}
> +	unix_state_lock(sk2);
>  }
>  
>  static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
> @@ -1591,7 +1586,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
>  		goto out_unlock;
>  	}
>  
> -	unix_state_lock_nested(sk);
> +	unix_state_lock(sk);
>  
>  	if (sk->sk_state != st) {
>  		unix_state_unlock(sk);
> @@ -3575,6 +3570,7 @@ static int __net_init unix_net_init(struct net *net)
>  
>  	for (i = 0; i < UNIX_HASH_SIZE; i++) {
>  		spin_lock_init(&net->unx.table.locks[i]);
> +		lock_set_cmp_fn_ptr_order(&net->unx.table.locks[i]);
>  		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
>  	}
>  
> diff --git a/net/unix/diag.c b/net/unix/diag.c
> index bec09a3a1d44..8ab5e2217e4c 100644
> --- a/net/unix/diag.c
> +++ b/net/unix/diag.c
> @@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
>  			 * queue lock. With the other's queue locked it's
>  			 * OK to lock the state.
>  			 */
> -			unix_state_lock_nested(req);
> +			unix_state_lock(req);
>  			peer = unix_sk(req)->peer;
>  			buf[i++] = (peer ? sock_i_ino(peer) : 0);
>  			unix_state_unlock(req);
> -- 
> 2.43.0

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] af_unix: convert to lock_cmp_fn
  2024-01-28  8:28   ` Kuniyuki Iwashima
@ 2024-01-28 19:38     ` Kent Overstreet
  2024-01-28 20:56       ` Kuniyuki Iwashima
  0 siblings, 1 reply; 12+ messages in thread
From: Kent Overstreet @ 2024-01-28 19:38 UTC (permalink / raw)
  To: Kuniyuki Iwashima; +Cc: boqun.feng, linux-fsdevel, linux-kernel, netdev, peterz

On Sun, Jan 28, 2024 at 12:28:38AM -0800, Kuniyuki Iwashima wrote:
> From: Kent Overstreet <kent.overstreet@linux.dev>
> Date: Fri, 26 Jan 2024 21:08:31 -0500
> > Kill
> >  - unix_state_lock_nested
> >  - _nested usage for net->unx.table.locks[].
> > 
> > replace both with lock_set_cmp_fn_ptr_order(&u->lock).
> > 
> > The lock ordering in sk_diag_dump_icons() looks suspicious; this may
> > turn up a real issue.
> 
> Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested().
> 
> The lock order in sk_diag_dump_icons() is
> 
>   listening socket -> child socket in the listener's queue
> 
> , and the inverse order never happens.  ptr comparison does not make
> sense in this case, and lockdep will complain about false positive.

Is that a real lock ordering? Is this parent -> child relationship well
defined?

If it is, we should be able to write a lock_cmp_fn for it, as long as
it's not some handwavy "this will never happen but _nested won't check
for it" like I saw elsewhere in the net code... :)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] af_unix: convert to lock_cmp_fn
  2024-01-28 19:38     ` Kent Overstreet
@ 2024-01-28 20:56       ` Kuniyuki Iwashima
  2024-01-29  1:34         ` Kent Overstreet
  0 siblings, 1 reply; 12+ messages in thread
From: Kuniyuki Iwashima @ 2024-01-28 20:56 UTC (permalink / raw)
  To: kent.overstreet
  Cc: boqun.feng, kuniyu, linux-fsdevel, linux-kernel, netdev, peterz

From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sun, 28 Jan 2024 14:38:02 -0500
> On Sun, Jan 28, 2024 at 12:28:38AM -0800, Kuniyuki Iwashima wrote:
> > From: Kent Overstreet <kent.overstreet@linux.dev>
> > Date: Fri, 26 Jan 2024 21:08:31 -0500
> > > Kill
> > >  - unix_state_lock_nested
> > >  - _nested usage for net->unx.table.locks[].
> > > 
> > > replace both with lock_set_cmp_fn_ptr_order(&u->lock).
> > > 
> > > The lock ordering in sk_diag_dump_icons() looks suspicious; this may
> > > turn up a real issue.
> > 
> > Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested().
> > 
> > The lock order in sk_diag_dump_icons() is
> > 
> >   listening socket -> child socket in the listener's queue
> > 
> > , and the inverse order never happens.  ptr comparison does not make
> > sense in this case, and lockdep will complain about false positive.
> 
> Is that a real lock ordering? Is this parent -> child relationship well
> defined?
> 
> If it is, we should be able to write a lock_cmp_fn for it, as long as
> it's not some handwavy "this will never happen but _nested won't check
> for it" like I saw elsewhere in the net code... :)

The problem would be there's no handy way to detect the relationship
except for iterating the queue again.

---8<---
static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
				  const struct lockdep_map *_b)
{
	const struct unix_sock *a = container_of(_a, struct unix_sock, lock.dep_map);
	const struct unix_sock *b = container_of(_b, struct unix_sock, lock.dep_map);

	if (a->sk.sk_state == TCP_LISTEN && b->sk.sk_state == TCP_ESTABLISHED) {
		/* check if b is a's cihld */
	}

	/* otherwise, ptr comparison here. */
}
---8<---


This can be resolved by a patch like this, which is in my local tree
for another series.

So, after posting the series, I can revisit this and write lock_cmp_fn
for u->lock.

---8<---
commit 12d39766b06068fda5987f4e7b4827e8c3273137
Author: Kuniyuki Iwashima <kuniyu@amazon.com>
Date:   Thu Jan 11 22:36:58 2024 +0000

    af_unix: Save listener for embryo socket.
    
    Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 9ea04653c7c9..d0c0d81bcb1d 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -82,6 +82,7 @@ struct scm_stat {
 struct unix_sock {
 	/* WARNING: sk has to be the first member */
 	struct sock		sk;
+#define usk_listener		sk.__sk_common.skc_unix_listener
 	struct unix_address	*addr;
 	struct path		path;
 	struct mutex		iolock, bindlock;
diff --git a/include/net/sock.h b/include/net/sock.h
index a9d99a9c583f..3df3d068345a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -142,6 +142,8 @@ typedef __u64 __bitwise __addrpair;
  *		[union with @skc_incoming_cpu]
  *	@skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number
  *		[union with @skc_incoming_cpu]
+ *	@skc_unix_listener: connection request listener socket for AF_UNIX
+ *		[union with @skc_rxhash]
  *	@skc_refcnt: reference count
  *
  *	This is the minimal network layer representation of sockets, the header
@@ -227,6 +229,7 @@ struct sock_common {
 		u32		skc_rxhash;
 		u32		skc_window_clamp;
 		u32		skc_tw_snd_nxt; /* struct tcp_timewait_sock */
+		struct sock	*skc_unix_listener; /* struct unix_sock */
 	};
 	/* public: */
 };
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5f9871555ec6..4a41bb727c32 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -991,6 +991,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
 	sk->sk_max_ack_backlog	= net->unx.sysctl_max_dgram_qlen;
 	sk->sk_destruct		= unix_sock_destructor;
 	u = unix_sk(sk);
+	u->usk_listener = NULL;
 	u->inflight = 0;
 	u->path.dentry = NULL;
 	u->path.mnt = NULL;
@@ -1612,6 +1613,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 	newsk->sk_type		= sk->sk_type;
 	init_peercred(newsk);
 	newu = unix_sk(newsk);
+	newu->usk_listener = other;
 	RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
 	otheru = unix_sk(other);
 
@@ -1707,8 +1709,8 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
 		       bool kern)
 {
 	struct sock *sk = sock->sk;
-	struct sock *tsk;
 	struct sk_buff *skb;
+	struct sock *tsk;
 	int err;
 
 	err = -EOPNOTSUPP;
@@ -1733,6 +1735,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
 	}
 
 	tsk = skb->sk;
+	unix_sk(tsk)->usk_listener = NULL;
 	skb_free_datagram(sk, skb);
 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
 
---8<---


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 4/4] af_unix: convert to lock_cmp_fn
  2024-01-28 20:56       ` Kuniyuki Iwashima
@ 2024-01-29  1:34         ` Kent Overstreet
  0 siblings, 0 replies; 12+ messages in thread
From: Kent Overstreet @ 2024-01-29  1:34 UTC (permalink / raw)
  To: Kuniyuki Iwashima; +Cc: boqun.feng, linux-fsdevel, linux-kernel, netdev, peterz

On Sun, Jan 28, 2024 at 12:56:32PM -0800, Kuniyuki Iwashima wrote:
> From: Kent Overstreet <kent.overstreet@linux.dev>
> Date: Sun, 28 Jan 2024 14:38:02 -0500
> > On Sun, Jan 28, 2024 at 12:28:38AM -0800, Kuniyuki Iwashima wrote:
> > > From: Kent Overstreet <kent.overstreet@linux.dev>
> > > Date: Fri, 26 Jan 2024 21:08:31 -0500
> > > > Kill
> > > >  - unix_state_lock_nested
> > > >  - _nested usage for net->unx.table.locks[].
> > > > 
> > > > replace both with lock_set_cmp_fn_ptr_order(&u->lock).
> > > > 
> > > > The lock ordering in sk_diag_dump_icons() looks suspicious; this may
> > > > turn up a real issue.
> > > 
> > > Yes, you cannot use lock_cmp_fn() for unix_state_lock_nested().
> > > 
> > > The lock order in sk_diag_dump_icons() is
> > > 
> > >   listening socket -> child socket in the listener's queue
> > > 
> > > , and the inverse order never happens.  ptr comparison does not make
> > > sense in this case, and lockdep will complain about false positive.
> > 
> > Is that a real lock ordering? Is this parent -> child relationship well
> > defined?
> > 
> > If it is, we should be able to write a lock_cmp_fn for it, as long as
> > it's not some handwavy "this will never happen but _nested won't check
> > for it" like I saw elsewhere in the net code... :)
> 
> The problem would be there's no handy way to detect the relationship
> except for iterating the queue again.
> 
> ---8<---
> static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
> 				  const struct lockdep_map *_b)
> {
> 	const struct unix_sock *a = container_of(_a, struct unix_sock, lock.dep_map);
> 	const struct unix_sock *b = container_of(_b, struct unix_sock, lock.dep_map);
> 
> 	if (a->sk.sk_state == TCP_LISTEN && b->sk.sk_state == TCP_ESTABLISHED) {
> 		/* check if b is a's cihld */
> 	}
> 
> 	/* otherwise, ptr comparison here. */
> }
> ---8<---
> 
> 
> This can be resolved by a patch like this, which is in my local tree
> for another series.
> 
> So, after posting the series, I can revisit this and write lock_cmp_fn
> for u->lock.

Sounds good! Please CC me when you do.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: (subset) [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn
  2024-01-27  2:01 ` [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn Kent Overstreet
@ 2024-01-29 14:09   ` Christian Brauner
  2024-01-29 15:21   ` Jan Kara
  1 sibling, 0 replies; 12+ messages in thread
From: Christian Brauner @ 2024-01-29 14:09 UTC (permalink / raw)
  To: Kent Overstreet
  Cc: Christian Brauner, peterz, boqun.feng, Alexander Viro, Jan Kara,
	linux-kernel, netdev, linux-fsdevel

On Fri, 26 Jan 2024 21:01:05 -0500, Kent Overstreet wrote:
> *_lock_nested() is fundamentally broken; lockdep needs to check lock
> ordering, but we cannot device a total ordering on an unbounded number
> of elements with only a few subclasses.
> 
> the replacement is to define lock ordering with a proper comparison
> function.
> 
> [...]

Applied to the vfs.misc branch of the vfs/vfs.git tree.
Patches in the vfs.misc branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs.misc

[1/4] fs/pipe: Convert to lockdep_cmp_fn
      https://git.kernel.org/vfs/vfs/c/38b25beb3b67

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn
  2024-01-27  2:01 ` [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn Kent Overstreet
  2024-01-29 14:09   ` (subset) " Christian Brauner
@ 2024-01-29 15:21   ` Jan Kara
  1 sibling, 0 replies; 12+ messages in thread
From: Jan Kara @ 2024-01-29 15:21 UTC (permalink / raw)
  To: Kent Overstreet
  Cc: linux-kernel, netdev, linux-fsdevel, peterz, boqun.feng,
	Alexander Viro, Christian Brauner, Jan Kara

On Fri 26-01-24 21:01:05, Kent Overstreet wrote:
> *_lock_nested() is fundamentally broken; lockdep needs to check lock
> ordering, but we cannot device a total ordering on an unbounded number
> of elements with only a few subclasses.
> 
> the replacement is to define lock ordering with a proper comparison
> function.
> 
> fs/pipe.c was already doing everything correctly otherwise, nothing
> much changes here.
> 
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: Christian Brauner <brauner@kernel.org>
> Cc: Jan Kara <jack@suse.cz>
> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

Looks good to me. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza


> ---
>  fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------
>  1 file changed, 36 insertions(+), 45 deletions(-)
> 
> diff --git a/fs/pipe.c b/fs/pipe.c
> index f1adbfe743d4..50c8a8596b52 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
>   * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
>   */
>  
> -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
> +#define cmp_int(l, r)		((l > r) - (l < r))
> +
> +#ifdef CONFIG_PROVE_LOCKING
> +static int pipe_lock_cmp_fn(const struct lockdep_map *a,
> +			    const struct lockdep_map *b)
>  {
> -	if (pipe->files)
> -		mutex_lock_nested(&pipe->mutex, subclass);
> +	return cmp_int((unsigned long) a, (unsigned long) b);
>  }
> +#endif
>  
>  void pipe_lock(struct pipe_inode_info *pipe)
>  {
> -	/*
> -	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
> -	 */
> -	pipe_lock_nested(pipe, I_MUTEX_PARENT);
> +	if (pipe->files)
> +		mutex_lock(&pipe->mutex);
>  }
>  EXPORT_SYMBOL(pipe_lock);
>  
> @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe)
>  }
>  EXPORT_SYMBOL(pipe_unlock);
>  
> -static inline void __pipe_lock(struct pipe_inode_info *pipe)
> -{
> -	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
> -}
> -
> -static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> -{
> -	mutex_unlock(&pipe->mutex);
> -}
> -
>  void pipe_double_lock(struct pipe_inode_info *pipe1,
>  		      struct pipe_inode_info *pipe2)
>  {
>  	BUG_ON(pipe1 == pipe2);
>  
> -	if (pipe1 < pipe2) {
> -		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
> -		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
> -	} else {
> -		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
> -		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
> -	}
> +	if (pipe1 > pipe2)
> +		swap(pipe1, pipe2);
> +
> +	pipe_lock(pipe1);
> +	pipe_lock(pipe2);
>  }
>  
>  static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
> @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  		return 0;
>  
>  	ret = 0;
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  
>  	/*
>  	 * We only wake up writers if the pipe was full when we started
> @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  			ret = -EAGAIN;
>  			break;
>  		}
> -		__pipe_unlock(pipe);
> +		mutex_unlock(&pipe->mutex);
>  
>  		/*
>  		 * We only get here if we didn't actually read anything.
> @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
>  		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
>  			return -ERESTARTSYS;
>  
> -		__pipe_lock(pipe);
> +		mutex_lock(&pipe->mutex);
>  		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
>  		wake_next_reader = true;
>  	}
>  	if (pipe_empty(pipe->head, pipe->tail))
>  		wake_next_reader = false;
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  
>  	if (was_full)
>  		wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
> @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  	if (unlikely(total_len == 0))
>  		return 0;
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  
>  	if (!pipe->readers) {
>  		send_sig(SIGPIPE, current, 0);
> @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>  		 * after waiting we need to re-check whether the pipe
>  		 * become empty while we dropped the lock.
>  		 */
> -		__pipe_unlock(pipe);
> +		mutex_unlock(&pipe->mutex);
>  		if (was_empty)
>  			wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
>  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>  		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
> -		__pipe_lock(pipe);
> +		mutex_lock(&pipe->mutex);
>  		was_empty = pipe_empty(pipe->head, pipe->tail);
>  		wake_next_writer = true;
>  	}
>  out:
>  	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
>  		wake_next_writer = false;
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  
>  	/*
>  	 * If we do do a wakeup event, we do a 'sync' wakeup, because we
> @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  
>  	switch (cmd) {
>  	case FIONREAD:
> -		__pipe_lock(pipe);
> +		mutex_lock(&pipe->mutex);
>  		count = 0;
>  		head = pipe->head;
>  		tail = pipe->tail;
> @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>  			count += pipe->bufs[tail & mask].len;
>  			tail++;
>  		}
> -		__pipe_unlock(pipe);
> +		mutex_unlock(&pipe->mutex);
>  
>  		return put_user(count, (int __user *)arg);
>  
>  #ifdef CONFIG_WATCH_QUEUE
>  	case IOC_WATCH_QUEUE_SET_SIZE: {
>  		int ret;
> -		__pipe_lock(pipe);
> +		mutex_lock(&pipe->mutex);
>  		ret = watch_queue_set_size(pipe, arg);
> -		__pipe_unlock(pipe);
> +		mutex_unlock(&pipe->mutex);
>  		return ret;
>  	}
>  
> @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file)
>  {
>  	struct pipe_inode_info *pipe = file->private_data;
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  	if (file->f_mode & FMODE_READ)
>  		pipe->readers--;
>  	if (file->f_mode & FMODE_WRITE)
> @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file)
>  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
>  		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
>  	}
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  
>  	put_pipe_info(inode, pipe);
>  	return 0;
> @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on)
>  	struct pipe_inode_info *pipe = filp->private_data;
>  	int retval = 0;
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  	if (filp->f_mode & FMODE_READ)
>  		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
>  	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
> @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on)
>  			/* this can happen only if on == T */
>  			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
>  	}
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  	return retval;
>  }
>  
> @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void)
>  		pipe->nr_accounted = pipe_bufs;
>  		pipe->user = user;
>  		mutex_init(&pipe->mutex);
> +		lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL);
>  		return pipe;
>  	}
>  
> @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
>  	filp->private_data = pipe;
>  	/* OK, we have a pipe and it's pinned down */
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  
>  	/* We can only do regular read/write on fifos */
>  	stream_open(inode, filp);
> @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
>  	}
>  
>  	/* Ok! */
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  	return 0;
>  
>  err_rd:
> @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
>  	goto err;
>  
>  err:
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  
>  	put_pipe_info(inode, pipe);
>  	return ret;
> @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
>  	if (!pipe)
>  		return -EBADF;
>  
> -	__pipe_lock(pipe);
> +	mutex_lock(&pipe->mutex);
>  
>  	switch (cmd) {
>  	case F_SETPIPE_SZ:
> @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
>  		break;
>  	}
>  
> -	__pipe_unlock(pipe);
> +	mutex_unlock(&pipe->mutex);
>  	return ret;
>  }
>  
> -- 
> 2.43.0
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2024-01-29 15:22 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-01-27  2:01 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
2024-01-27  2:01 ` [PATCH 1/4] fs/pipe: Convert to lockdep_cmp_fn Kent Overstreet
2024-01-29 14:09   ` (subset) " Christian Brauner
2024-01-29 15:21   ` Jan Kara
2024-01-27  2:01 ` [PATCH 2/4] pktcdvd: kill mutex_lock_nested() usage Kent Overstreet
2024-01-27  2:01 ` [PATCH 3/4] net: Convert sk->sk_peer_lock to lock_set_cmp_fn_ptr_order() Kent Overstreet
2024-01-27  2:01 ` [PATCH 4/4] af_unix: convert to lock_cmp_fn Kent Overstreet
  -- strict thread matches above, loose matches on Subject: below --
2024-01-27  2:08 [PATCH 0/4] lockdep cmp fn conversions Kent Overstreet
2024-01-27  2:08 ` [PATCH 4/4] af_unix: convert to lock_cmp_fn Kent Overstreet
2024-01-28  8:28   ` Kuniyuki Iwashima
2024-01-28 19:38     ` Kent Overstreet
2024-01-28 20:56       ` Kuniyuki Iwashima
2024-01-29  1:34         ` Kent Overstreet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).