Linux Trace Kernel
 help / color / mirror / Atom feed
* [PATCH v3 03/12] net: change sock.sk_ino and sock_i_ino() to u64
From: Jeff Layton @ 2026-03-04 15:32 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers, Dan Williams, Eric Biggers,
	Theodore Y. Ts'o, Muchun Song, Oscar Salvador,
	David Hildenbrand, David Howells, Paulo Alcantara, Andreas Dilger,
	Jan Kara, Jaegeuk Kim, Chao Yu, Trond Myklebust, Anna Schumaker,
	Chuck Lever, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey,
	Steve French, Ronnie Sahlberg, Shyam Prasad N, Bharath SM,
	Alexander Aring, Ryusuke Konishi, Viacheslav Dubeyko,
	Eric Van Hensbergen, Latchesar Ionkov, Dominique Martinet,
	Christian Schoenebeck, David Sterba, Marc Dionne, Ian Kent,
	Luis de Bethencourt, Salah Triki, Tigran A. Aivazian,
	Ilya Dryomov, Alex Markuze, Jan Harkes, coda, Nicolas Pitre,
	Tyler Hicks, Amir Goldstein, Christoph Hellwig,
	John Paul Adrian Glaubitz, Yangtao Li, Mikulas Patocka,
	David Woodhouse, Richard Weinberger, Dave Kleikamp,
	Konstantin Komarov, Mark Fasheh, Joel Becker, Joseph Qi,
	Mike Marshall, Martin Brandenburg, Miklos Szeredi, Anders Larsen,
	Zhihao Cheng, Damien Le Moal, Naohiro Aota, Johannes Thumshirn,
	John Johansen, Paul Moore, James Morris, Serge E. Hallyn,
	Mimi Zohar, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg, Fan Wu,
	Stephen Smalley, Ondrej Mosnacek, Casey Schaufler, Alex Deucher,
	Christian König, David Airlie, Simona Vetter, Sumit Semwal,
	Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn,
	David S. Miller, Jakub Kicinski, Simon Horman, Oleg Nesterov,
	Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, James Clark, Darrick J. Wong,
	Martin Schiller, Eric Paris, Joerg Reuter, Marcel Holtmann,
	Johan Hedberg, Luiz Augusto von Dentz, Oliver Hartkopp,
	Marc Kleine-Budde, David Ahern, Neal Cardwell, Steffen Klassert,
	Herbert Xu, Remi Denis-Courmont, Marcelo Ricardo Leitner,
	Xin Long, Magnus Karlsson, Maciej Fijalkowski, Stanislav Fomichev,
	Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
	John Fastabend
  Cc: linux-fsdevel, linux-kernel, linux-trace-kernel, nvdimm, fsverity,
	linux-mm, netfs, linux-ext4, linux-f2fs-devel, linux-nfs,
	linux-cifs, samba-technical, linux-nilfs, v9fs, linux-afs, autofs,
	ceph-devel, codalist, ecryptfs, linux-mtd, jfs-discussion, ntfs3,
	ocfs2-devel, devel, linux-unionfs, apparmor,
	linux-security-module, linux-integrity, selinux, amd-gfx,
	dri-devel, linux-media, linaro-mm-sig, netdev, linux-perf-users,
	linux-fscrypt, linux-xfs, linux-hams, linux-x25, audit,
	linux-bluetooth, linux-can, linux-sctp, bpf, Jeff Layton
In-Reply-To: <20260304-iino-u64-v3-0-2257ad83d372@kernel.org>

inode->i_ino is being converted to a u64. sock.sk_ino (which caches the
inode number) must also be widened to avoid truncation on 32-bit
architectures where unsigned long is only 32 bits.

Change sk_ino from unsigned long to u64, and update the return type
of sock_i_ino() to match. Fix all format strings that print the
result of sock_i_ino() (%lu -> %llu), and widen the intermediate
variables and function parameters in the diag modules that were
using int to hold the inode number.

Note that the UAPI socket diag structures (inet_diag_msg.idiag_inode,
unix_diag_msg.udiag_ino, etc.) are all __u32 and cannot be changed
without breaking the ABI. The assignments to those fields will
silently truncate, which is the existing behavior.

Acked-by: Marc Kleine-Budde <mkl@pengutronix.de> # for net/can
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 include/net/sock.h           | 4 ++--
 net/ax25/af_ax25.c           | 2 +-
 net/bluetooth/af_bluetooth.c | 4 ++--
 net/can/bcm.c                | 2 +-
 net/ipv4/ping.c              | 2 +-
 net/ipv4/raw.c               | 2 +-
 net/ipv4/tcp_ipv4.c          | 2 +-
 net/ipv4/udp.c               | 2 +-
 net/ipv6/datagram.c          | 2 +-
 net/ipv6/tcp_ipv6.c          | 2 +-
 net/key/af_key.c             | 2 +-
 net/netlink/af_netlink.c     | 2 +-
 net/netlink/diag.c           | 2 +-
 net/packet/af_packet.c       | 2 +-
 net/packet/diag.c            | 2 +-
 net/phonet/socket.c          | 4 ++--
 net/sctp/proc.c              | 4 ++--
 net/unix/af_unix.c           | 2 +-
 net/unix/diag.c              | 6 +++---
 net/xdp/xsk_diag.c           | 2 +-
 20 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 6c9a83016e9551ed2e2a0d7edf32300b8a4327e7..cfae4fefb8f55d8be6ff5ef401f7b9c601f67cc3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -537,7 +537,7 @@ struct sock {
 	rwlock_t		sk_callback_lock;
 	u32			sk_ack_backlog;
 	u32			sk_max_ack_backlog;
-	unsigned long		sk_ino;
+	u64			sk_ino;
 	spinlock_t		sk_peer_lock;
 	int			sk_bind_phc;
 	struct pid		*sk_peer_pid;
@@ -2140,7 +2140,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 	write_unlock_bh(&sk->sk_callback_lock);
 }
 
-static inline unsigned long sock_i_ino(const struct sock *sk)
+static inline u64 sock_i_ino(const struct sock *sk)
 {
 	/* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */
 	return READ_ONCE(sk->sk_ino);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index a76f4793aed27657dea22e9e28c1d3cd45087cb2..9d236e64f5f59abe2fd974ffd9d6ef1193db5b52 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1986,7 +1986,7 @@ static int ax25_info_show(struct seq_file *seq, void *v)
 		   ax25->paclen);
 
 	if (ax25->sk != NULL) {
-		seq_printf(seq, " %d %d %lu\n",
+		seq_printf(seq, " %d %d %llu\n",
 			   sk_wmem_alloc_get(ax25->sk),
 			   sk_rmem_alloc_get(ax25->sk),
 			   sock_i_ino(ax25->sk));
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 2b94e20772038f1d90228d214f57ab334ca01a50..33d053d634072fee79bb62c003980c203fb30111 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -817,14 +817,14 @@ static int bt_seq_show(struct seq_file *seq, void *v)
 		struct bt_sock *bt = bt_sk(sk);
 
 		seq_printf(seq,
-			   "%pK %-6d %-6u %-6u %-6u %-6lu %-6lu",
+			   "%pK %-6d %-6u %-6u %-6u %-6llu %-6llu",
 			   sk,
 			   refcount_read(&sk->sk_refcnt),
 			   sk_rmem_alloc_get(sk),
 			   sk_wmem_alloc_get(sk),
 			   from_kuid(seq_user_ns(seq), sk_uid(sk)),
 			   sock_i_ino(sk),
-			   bt->parent ? sock_i_ino(bt->parent) : 0LU);
+			   bt->parent ? sock_i_ino(bt->parent) : 0ULL);
 
 		if (l->custom_seq_show) {
 			seq_putc(seq, ' ');
diff --git a/net/can/bcm.c b/net/can/bcm.c
index b7324e9c955b7f03a8f032676bbd89aa8e06bded..30aac2f903d57421d2761153939200813b99e0e9 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1712,7 +1712,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr_unsized *uaddr, int
 #if IS_ENABLED(CONFIG_PROC_FS)
 	if (net->can.bcmproc_dir) {
 		/* unique socket address as filename */
-		sprintf(bo->procname, "%lu", sock_i_ino(sk));
+		sprintf(bo->procname, "%llu", sock_i_ino(sk));
 		bo->bcm_proc_read = proc_create_net_single(bo->procname, 0644,
 						     net->can.bcmproc_dir,
 						     bcm_proc_show, sk);
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 71d5e17719debb14ca2400edcf5a2cc3153291c6..bc4b43e5230336521802403c5c74cfb5da8b05b1 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -1111,7 +1111,7 @@ static void ping_v4_format_sock(struct sock *sp, struct seq_file *f,
 	__u16 srcp = ntohs(inet->inet_sport);
 
 	seq_printf(f, "%5d: %08X:%04X %08X:%04X"
-		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u",
+		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %llu %d %pK %u",
 		bucket, src, srcp, dest, destp, sp->sk_state,
 		sk_wmem_alloc_get(sp),
 		sk_rmem_alloc_get(sp),
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index e20c41206e29d5655f5378f08fb283f2ecbd2097..bcc99ced1ade135701e2df0e32da0da0af2fc487 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -1041,7 +1041,7 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i)
 	      srcp  = inet->inet_num;
 
 	seq_printf(seq, "%4d: %08X:%04X %08X:%04X"
-		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %llu %d %pK %u\n",
 		i, src, srcp, dest, destp, sp->sk_state,
 		sk_wmem_alloc_get(sp),
 		sk_rmem_alloc_get(sp),
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d53d39be291a5750af3ab2a160b35f0f8a28ff9d..f2ef41583fc195e153ed848afaf3391080696f02 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2889,7 +2889,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
 				      READ_ONCE(tp->copied_seq), 0);
 
 	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
-			"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
+			"%08X %5u %8d %llu %d %pK %lu %lu %u %u %d",
 		i, src, srcp, dest, destp, state,
 		READ_ONCE(tp->write_seq) - tp->snd_una,
 		rx_queue,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6c6b68a66dcd3b3d8f1747fead868c195e04a0a9..db58bf786c4a8c73b5d42851bb68bbb22a5e85ab 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -3428,7 +3428,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f,
 	__u16 srcp	  = ntohs(inet->inet_sport);
 
 	seq_printf(f, "%5d: %08X:%04X %08X:%04X"
-		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u",
+		" %02X %08X:%08X %02X:%08lX %08X %5u %8d %llu %d %pK %u",
 		bucket, src, srcp, dest, destp, sp->sk_state,
 		sk_wmem_alloc_get(sp),
 		udp_rqueue_get(sp),
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index c564b68a056268c7cbc81b5f29f60289ea9e09eb..611fddb90c79e94cc7dfcf85b343bcbf630c39d4 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -1055,7 +1055,7 @@ void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,
 	src   = &sp->sk_v6_rcv_saddr;
 	seq_printf(seq,
 		   "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %llu %d %pK %u\n",
 		   bucket,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e46a0efae01235ae7430ed268b92cb47309b8d28..379c0935f4030b9b3432e867c357883df3089f74 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2175,7 +2175,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
+		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %llu %d %pK %lu %lu %u %u %d\n",
 		   i,
 		   src->s6_addr32[0], src->s6_addr32[1],
 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 0756bac62f7c042851636badf0a5e961c4e673c1..522308ec934ed055bdce4fa672b4e6a8bb67edc7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3783,7 +3783,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v)
 	if (v == SEQ_START_TOKEN)
 		seq_printf(f ,"sk       RefCnt Rmem   Wmem   User   Inode\n");
 	else
-		seq_printf(f, "%pK %-6d %-6u %-6u %-6u %-6lu\n",
+		seq_printf(f, "%pK %-6d %-6u %-6u %-6u %-6llu\n",
 			       s,
 			       refcount_read(&s->sk_refcnt),
 			       sk_rmem_alloc_get(s),
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4d609d5cf40653e04de60f2d28ee26b8bdcdc2ed..aba847902be5ace66e17abf0236a5eeebfee0739 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2700,7 +2700,7 @@ static int netlink_native_seq_show(struct seq_file *seq, void *v)
 		struct sock *s = v;
 		struct netlink_sock *nlk = nlk_sk(s);
 
-		seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8u %-8lu\n",
+		seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8u %-8llu\n",
 			   s,
 			   s->sk_protocol,
 			   nlk->portid,
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 1dfc340736b832459388304003ff1f5e1e481d23..0b3e021bd0ed29edc094fad2c79c7c067edcdd50 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -43,7 +43,7 @@ static int sk_diag_put_flags(struct sock *sk, struct sk_buff *skb)
 
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 			struct netlink_diag_req *req,
-			u32 portid, u32 seq, u32 flags, int sk_ino)
+			u32 portid, u32 seq, u32 flags, u64 sk_ino)
 {
 	struct nlmsghdr *nlh;
 	struct netlink_diag_msg *rep;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 72d0935139f0f5feb00c051143fb47f45fd1f94d..f2af2e0a8530a982d7b95b91ac0388f67b16d46f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4721,7 +4721,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
 		const struct packet_sock *po = pkt_sk(s);
 
 		seq_printf(seq,
-			   "%pK %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
+			   "%pK %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6llu\n",
 			   s,
 			   refcount_read(&s->sk_refcnt),
 			   s->sk_type,
diff --git a/net/packet/diag.c b/net/packet/diag.c
index c8f43e0c1925fab8ef6c39de3547dcd6f7389b81..cee773f46571ca51718544227f3425192bb77f24 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -130,7 +130,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 			struct packet_diag_req *req,
 			bool may_report_filterinfo,
 			struct user_namespace *user_ns,
-			u32 portid, u32 seq, u32 flags, int sk_ino)
+			u32 portid, u32 seq, u32 flags, u64 sk_ino)
 {
 	struct nlmsghdr *nlh;
 	struct packet_diag_msg *rp;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 4423d483c630adc93af07e0c23a78800b696d4a4..c4af26357144ebb8016d001244c1fdaf925892fc 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -579,7 +579,7 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v)
 		struct sock *sk = v;
 		struct pn_sock *pn = pn_sk(sk);
 
-		seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu "
+		seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %llu "
 			"%d %pK %u",
 			sk->sk_protocol, pn->sobject, pn->dobject,
 			pn->resource, sk->sk_state,
@@ -754,7 +754,7 @@ static int pn_res_seq_show(struct seq_file *seq, void *v)
 		struct sock *sk = rcu_dereference_protected(*psk,
 					lockdep_is_held(&resource_mutex));
 
-		seq_printf(seq, "%02X %5u %lu",
+		seq_printf(seq, "%02X %5u %llu",
 			   (int) (psk - pnres.sk),
 			   from_kuid_munged(seq_user_ns(seq), sk_uid(sk)),
 			   sock_i_ino(sk));
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 1ed281f3c355d559d82e4fa2be10c191f787c85e..43433d7e2acd706ede7b59a51ab6441d1ee1dd12 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -174,7 +174,7 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v)
 		sk = ep->base.sk;
 		if (!net_eq(sock_net(sk), seq_file_net(seq)))
 			continue;
-		seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5u %5lu ", ep, sk,
+		seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5u %5llu ", ep, sk,
 			   sctp_sk(sk)->type, sk->sk_state, hash,
 			   ep->base.bind_addr.port,
 			   from_kuid_munged(seq_user_ns(seq), sk_uid(sk)),
@@ -261,7 +261,7 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 
 	seq_printf(seq,
 		   "%8pK %8pK %-3d %-3d %-2d %-4d "
-		   "%4d %8d %8d %7u %5lu %-5d %5d ",
+		   "%4d %8d %8d %7u %5llu %-5d %5d ",
 		   assoc, sk, sctp_sk(sk)->type, sk->sk_state,
 		   assoc->state, 0,
 		   assoc->assoc_id,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3756a93dc63aabc814f7edd31e8c0b6fafa02ce7..a6c5015f3f0a19a930da636ef37eb98b3c40663f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -3537,7 +3537,7 @@ static int unix_seq_show(struct seq_file *seq, void *v)
 		struct unix_sock *u = unix_sk(s);
 		unix_state_lock(s);
 
-		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
+		seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5llu",
 			s,
 			refcount_read(&s->sk_refcnt),
 			0,
diff --git a/net/unix/diag.c b/net/unix/diag.c
index ca34730261510c2b34dc6661eadaa9d1651e59d2..410f6c8745b7a0e5287b9f7fbc0a71d15cffc850 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -45,7 +45,7 @@ static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb)
 static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb)
 {
 	struct sock *peer;
-	int ino;
+	u64 ino;
 
 	peer = unix_peer_get(sk);
 	if (peer) {
@@ -112,7 +112,7 @@ static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb,
 
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 			struct user_namespace *user_ns,
-			u32 portid, u32 seq, u32 flags, int sk_ino)
+			u32 portid, u32 seq, u32 flags, u64 sk_ino)
 {
 	struct nlmsghdr *nlh;
 	struct unix_diag_msg *rep;
@@ -186,7 +186,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		num = 0;
 		spin_lock(&net->unx.table.locks[slot]);
 		sk_for_each(sk, &net->unx.table.buckets[slot]) {
-			int sk_ino;
+			u64 sk_ino;
 
 			if (num < s_num)
 				goto next;
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index 0e0bca031c0399901949982bf430ade6cde286d3..0170363eb542cc1874817f46fd3627857761ab77 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -92,7 +92,7 @@ static int xsk_diag_put_stats(const struct xdp_sock *xs, struct sk_buff *nlskb)
 static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb,
 			 struct xdp_diag_req *req,
 			 struct user_namespace *user_ns,
-			 u32 portid, u32 seq, u32 flags, int sk_ino)
+			 u32 portid, u32 seq, u32 flags, u64 sk_ino)
 {
 	struct xdp_sock *xs = xdp_sk(sk);
 	struct xdp_diag_msg *msg;

-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 02/12] audit: widen ino fields to u64
From: Jeff Layton @ 2026-03-04 15:32 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers, Dan Williams, Eric Biggers,
	Theodore Y. Ts'o, Muchun Song, Oscar Salvador,
	David Hildenbrand, David Howells, Paulo Alcantara, Andreas Dilger,
	Jan Kara, Jaegeuk Kim, Chao Yu, Trond Myklebust, Anna Schumaker,
	Chuck Lever, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey,
	Steve French, Ronnie Sahlberg, Shyam Prasad N, Bharath SM,
	Alexander Aring, Ryusuke Konishi, Viacheslav Dubeyko,
	Eric Van Hensbergen, Latchesar Ionkov, Dominique Martinet,
	Christian Schoenebeck, David Sterba, Marc Dionne, Ian Kent,
	Luis de Bethencourt, Salah Triki, Tigran A. Aivazian,
	Ilya Dryomov, Alex Markuze, Jan Harkes, coda, Nicolas Pitre,
	Tyler Hicks, Amir Goldstein, Christoph Hellwig,
	John Paul Adrian Glaubitz, Yangtao Li, Mikulas Patocka,
	David Woodhouse, Richard Weinberger, Dave Kleikamp,
	Konstantin Komarov, Mark Fasheh, Joel Becker, Joseph Qi,
	Mike Marshall, Martin Brandenburg, Miklos Szeredi, Anders Larsen,
	Zhihao Cheng, Damien Le Moal, Naohiro Aota, Johannes Thumshirn,
	John Johansen, Paul Moore, James Morris, Serge E. Hallyn,
	Mimi Zohar, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg, Fan Wu,
	Stephen Smalley, Ondrej Mosnacek, Casey Schaufler, Alex Deucher,
	Christian König, David Airlie, Simona Vetter, Sumit Semwal,
	Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn,
	David S. Miller, Jakub Kicinski, Simon Horman, Oleg Nesterov,
	Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, James Clark, Darrick J. Wong,
	Martin Schiller, Eric Paris, Joerg Reuter, Marcel Holtmann,
	Johan Hedberg, Luiz Augusto von Dentz, Oliver Hartkopp,
	Marc Kleine-Budde, David Ahern, Neal Cardwell, Steffen Klassert,
	Herbert Xu, Remi Denis-Courmont, Marcelo Ricardo Leitner,
	Xin Long, Magnus Karlsson, Maciej Fijalkowski, Stanislav Fomichev,
	Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
	John Fastabend
  Cc: linux-fsdevel, linux-kernel, linux-trace-kernel, nvdimm, fsverity,
	linux-mm, netfs, linux-ext4, linux-f2fs-devel, linux-nfs,
	linux-cifs, samba-technical, linux-nilfs, v9fs, linux-afs, autofs,
	ceph-devel, codalist, ecryptfs, linux-mtd, jfs-discussion, ntfs3,
	ocfs2-devel, devel, linux-unionfs, apparmor,
	linux-security-module, linux-integrity, selinux, amd-gfx,
	dri-devel, linux-media, linaro-mm-sig, netdev, linux-perf-users,
	linux-fscrypt, linux-xfs, linux-hams, linux-x25, audit,
	linux-bluetooth, linux-can, linux-sctp, bpf, Jeff Layton
In-Reply-To: <20260304-iino-u64-v3-0-2257ad83d372@kernel.org>

inode->i_ino is being widened from unsigned long to u64. The audit
subsystem uses unsigned long ino in struct fields, function parameters,
and local variables that store inode numbers from arbitrary filesystems.
On 32-bit platforms this truncates inode numbers that exceed 32 bits,
which will cause incorrect audit log entries and broken watch/mark
comparisons.

Widen all audit ino fields, parameters, and locals to u64, and update
the inode format string from %lu to %llu to match.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 include/linux/audit.h   |  2 +-
 kernel/audit.h          | 13 ++++++-------
 kernel/audit_fsnotify.c |  4 ++--
 kernel/audit_watch.c    | 12 ++++++------
 kernel/auditsc.c        |  4 ++--
 5 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index b642b5faca654c8465b6839c32b633426e1d3d9a..b915aaa7ed7399a6e453b1bb9bacbda686028638 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -15,7 +15,7 @@
 #include <uapi/linux/audit.h>
 #include <uapi/linux/fanotify.h>
 
-#define AUDIT_INO_UNSET ((unsigned long)-1)
+#define AUDIT_INO_UNSET ((u64)-1)
 #define AUDIT_DEV_UNSET ((dev_t)-1)
 
 struct audit_sig_info {
diff --git a/kernel/audit.h b/kernel/audit.h
index 7c401729e21bbcb062e2d5f3059d4496ed83529b..ac81fa02bcd7501e31461a346c4e599841525001 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -76,7 +76,7 @@ struct audit_names {
 	int			name_len;	/* number of chars to log */
 	bool			hidden;		/* don't log this record */
 
-	unsigned long		ino;
+	u64			ino;
 	dev_t			dev;
 	umode_t			mode;
 	kuid_t			uid;
@@ -225,9 +225,9 @@ extern int auditd_test_task(struct task_struct *task);
 #define AUDIT_INODE_BUCKETS	32
 extern struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
 
-static inline int audit_hash_ino(u32 ino)
+static inline int audit_hash_ino(u64 ino)
 {
-	return (ino & (AUDIT_INODE_BUCKETS-1));
+	return ((u32)ino & (AUDIT_INODE_BUCKETS-1));
 }
 
 /* Indicates that audit should log the full pathname. */
@@ -277,16 +277,15 @@ extern int audit_to_watch(struct audit_krule *krule, char *path, int len,
 extern int audit_add_watch(struct audit_krule *krule, struct list_head **list);
 extern void audit_remove_watch_rule(struct audit_krule *krule);
 extern char *audit_watch_path(struct audit_watch *watch);
-extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino,
-			       dev_t dev);
+extern int audit_watch_compare(struct audit_watch *watch, u64 ino, dev_t dev);
 
 extern struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule,
 						    char *pathname, int len);
 extern char *audit_mark_path(struct audit_fsnotify_mark *mark);
 extern void audit_remove_mark(struct audit_fsnotify_mark *audit_mark);
 extern void audit_remove_mark_rule(struct audit_krule *krule);
-extern int audit_mark_compare(struct audit_fsnotify_mark *mark,
-			      unsigned long ino, dev_t dev);
+extern int audit_mark_compare(struct audit_fsnotify_mark *mark, u64 ino,
+			      dev_t dev);
 extern int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old);
 extern int audit_exe_compare(struct task_struct *tsk,
 			     struct audit_fsnotify_mark *mark);
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index a4401f6510608119fd928944c36103326475e3b2..711454f9f7242847f78e7eeed92db7a66be265e6 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -25,7 +25,7 @@
  */
 struct audit_fsnotify_mark {
 	dev_t dev;		/* associated superblock device */
-	unsigned long ino;	/* associated inode number */
+	u64 ino;		/* associated inode number */
 	char *path;		/* insertion path */
 	struct fsnotify_mark mark; /* fsnotify mark on the inode */
 	struct audit_krule *rule;
@@ -57,7 +57,7 @@ char *audit_mark_path(struct audit_fsnotify_mark *mark)
 	return mark->path;
 }
 
-int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, dev_t dev)
+int audit_mark_compare(struct audit_fsnotify_mark *mark, u64 ino, dev_t dev)
 {
 	if (mark->ino == AUDIT_INO_UNSET)
 		return 0;
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 096faac2435ce2b3741fb1f623ea7fab65ae7a07..33577f0f54eff1eafe48a94dd2839b00fe7dffcc 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -37,7 +37,7 @@ struct audit_watch {
 	refcount_t		count;	/* reference count */
 	dev_t			dev;	/* associated superblock device */
 	char			*path;	/* insertion path */
-	unsigned long		ino;	/* associated inode number */
+	u64			ino;	/* associated inode number */
 	struct audit_parent	*parent; /* associated parent */
 	struct list_head	wlist;	/* entry in parent->watches list */
 	struct list_head	rules;	/* anchor for krule->rlist */
@@ -125,7 +125,7 @@ char *audit_watch_path(struct audit_watch *watch)
 	return watch->path;
 }
 
-int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
+int audit_watch_compare(struct audit_watch *watch, u64 ino, dev_t dev)
 {
 	return (watch->ino != AUDIT_INO_UNSET) &&
 		(watch->ino == ino) &&
@@ -244,7 +244,7 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc
 /* Update inode info in audit rules based on filesystem event. */
 static void audit_update_watch(struct audit_parent *parent,
 			       const struct qstr *dname, dev_t dev,
-			       unsigned long ino, unsigned invalidating)
+			       u64 ino, unsigned invalidating)
 {
 	struct audit_watch *owatch, *nwatch, *nextw;
 	struct audit_krule *r, *nextr;
@@ -285,7 +285,7 @@ static void audit_update_watch(struct audit_parent *parent,
 				list_del(&oentry->rule.list);
 				audit_panic("error updating watch, removing");
 			} else {
-				int h = audit_hash_ino((u32)ino);
+				int h = audit_hash_ino(ino);
 
 				/*
 				 * nentry->rule.watch == oentry->rule.watch so
@@ -439,7 +439,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
 
 	audit_add_to_parent(krule, parent);
 
-	h = audit_hash_ino((u32)watch->ino);
+	h = audit_hash_ino(watch->ino);
 	*list = &audit_inode_hash[h];
 error:
 	path_put(&parent_path);
@@ -527,7 +527,7 @@ int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old)
 int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
 {
 	struct file *exe_file;
-	unsigned long ino;
+	u64 ino;
 	dev_t dev;
 
 	/* only do exe filtering if we are recording @current events/records */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f6af6a8f68c4f6d14d9a899934138df2036e1f9a..ab54fccba215ca61d56335d1a22a7f26297e28ee 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -886,7 +886,7 @@ static int audit_filter_inode_name(struct task_struct *tsk,
 				   struct audit_names *n,
 				   struct audit_context *ctx)
 {
-	int h = audit_hash_ino((u32)n->ino);
+	int h = audit_hash_ino(n->ino);
 	struct list_head *list = &audit_inode_hash[h];
 
 	return __audit_filter_op(tsk, ctx, list, n, ctx->major);
@@ -1534,7 +1534,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n,
 		audit_log_format(ab, " name=(null)");
 
 	if (n->ino != AUDIT_INO_UNSET)
-		audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#ho ouid=%u ogid=%u rdev=%02x:%02x",
+		audit_log_format(ab, " inode=%llu dev=%02x:%02x mode=%#ho ouid=%u ogid=%u rdev=%02x:%02x",
 				 n->ino,
 				 MAJOR(n->dev),
 				 MINOR(n->dev),

-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 01/12] vfs: widen inode hash/lookup functions to u64
From: Jeff Layton @ 2026-03-04 15:32 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers, Dan Williams, Eric Biggers,
	Theodore Y. Ts'o, Muchun Song, Oscar Salvador,
	David Hildenbrand, David Howells, Paulo Alcantara, Andreas Dilger,
	Jan Kara, Jaegeuk Kim, Chao Yu, Trond Myklebust, Anna Schumaker,
	Chuck Lever, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey,
	Steve French, Ronnie Sahlberg, Shyam Prasad N, Bharath SM,
	Alexander Aring, Ryusuke Konishi, Viacheslav Dubeyko,
	Eric Van Hensbergen, Latchesar Ionkov, Dominique Martinet,
	Christian Schoenebeck, David Sterba, Marc Dionne, Ian Kent,
	Luis de Bethencourt, Salah Triki, Tigran A. Aivazian,
	Ilya Dryomov, Alex Markuze, Jan Harkes, coda, Nicolas Pitre,
	Tyler Hicks, Amir Goldstein, Christoph Hellwig,
	John Paul Adrian Glaubitz, Yangtao Li, Mikulas Patocka,
	David Woodhouse, Richard Weinberger, Dave Kleikamp,
	Konstantin Komarov, Mark Fasheh, Joel Becker, Joseph Qi,
	Mike Marshall, Martin Brandenburg, Miklos Szeredi, Anders Larsen,
	Zhihao Cheng, Damien Le Moal, Naohiro Aota, Johannes Thumshirn,
	John Johansen, Paul Moore, James Morris, Serge E. Hallyn,
	Mimi Zohar, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg, Fan Wu,
	Stephen Smalley, Ondrej Mosnacek, Casey Schaufler, Alex Deucher,
	Christian König, David Airlie, Simona Vetter, Sumit Semwal,
	Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn,
	David S. Miller, Jakub Kicinski, Simon Horman, Oleg Nesterov,
	Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, James Clark, Darrick J. Wong,
	Martin Schiller, Eric Paris, Joerg Reuter, Marcel Holtmann,
	Johan Hedberg, Luiz Augusto von Dentz, Oliver Hartkopp,
	Marc Kleine-Budde, David Ahern, Neal Cardwell, Steffen Klassert,
	Herbert Xu, Remi Denis-Courmont, Marcelo Ricardo Leitner,
	Xin Long, Magnus Karlsson, Maciej Fijalkowski, Stanislav Fomichev,
	Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
	John Fastabend
  Cc: linux-fsdevel, linux-kernel, linux-trace-kernel, nvdimm, fsverity,
	linux-mm, netfs, linux-ext4, linux-f2fs-devel, linux-nfs,
	linux-cifs, samba-technical, linux-nilfs, v9fs, linux-afs, autofs,
	ceph-devel, codalist, ecryptfs, linux-mtd, jfs-discussion, ntfs3,
	ocfs2-devel, devel, linux-unionfs, apparmor,
	linux-security-module, linux-integrity, selinux, amd-gfx,
	dri-devel, linux-media, linaro-mm-sig, netdev, linux-perf-users,
	linux-fscrypt, linux-xfs, linux-hams, linux-x25, audit,
	linux-bluetooth, linux-can, linux-sctp, bpf, Jeff Layton
In-Reply-To: <20260304-iino-u64-v3-0-2257ad83d372@kernel.org>

Change the inode hash/lookup VFS API functions to accept u64 parameters
instead of unsigned long for inode numbers and hash values. This is
preparation for widening i_ino itself to u64, which will allow
filesystems to store full 64-bit inode numbers on 32-bit architectures.

Since unsigned long implicitly widens to u64 on all architectures, this
change is backward-compatible with all existing callers.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/f2fs/node.c     |  2 +-
 fs/inode.c         | 36 ++++++++++++++++++------------------
 include/linux/fs.h | 26 +++++++++++++-------------
 3 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 2030e943ab9b3d5e2deb20efe9a44cf5093a61fb..d8d02870cfd1c22cf1951201361519de534b6bf7 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1997,7 +1997,7 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
 	return ret;
 }
 
-static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data)
+static int f2fs_match_ino(struct inode *inode, u64 ino, void *data)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	bool clean;
diff --git a/fs/inode.c b/fs/inode.c
index cc12b68e021b2c97cc88a46ddc736334ecb8edfa..62df5dda05894297dde05e541e4c8550bd866fef 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -672,7 +672,7 @@ static inline void inode_sb_list_del(struct inode *inode)
 	}
 }
 
-static unsigned long hash(struct super_block *sb, unsigned long hashval)
+static unsigned long hash(struct super_block *sb, u64 hashval)
 {
 	unsigned long tmp;
 
@@ -685,12 +685,12 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval)
 /**
  *	__insert_inode_hash - hash an inode
  *	@inode: unhashed inode
- *	@hashval: unsigned long value used to locate this object in the
+ *	@hashval: u64 value used to locate this object in the
  *		inode_hashtable.
  *
  *	Add an inode to the inode hash for this superblock.
  */
-void __insert_inode_hash(struct inode *inode, unsigned long hashval)
+void __insert_inode_hash(struct inode *inode, u64 hashval)
 {
 	struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
 
@@ -1087,7 +1087,7 @@ static struct inode *find_inode(struct super_block *sb,
  * iget_locked for details.
  */
 static struct inode *find_inode_fast(struct super_block *sb,
-				struct hlist_head *head, unsigned long ino,
+				struct hlist_head *head, u64 ino,
 				bool hash_locked, bool *isnew)
 {
 	struct inode *inode = NULL;
@@ -1301,7 +1301,7 @@ EXPORT_SYMBOL(unlock_two_nondirectories);
  * Note that both @test and @set are called with the inode_hash_lock held, so
  * they can't sleep.
  */
-struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
+struct inode *inode_insert5(struct inode *inode, u64 hashval,
 			    int (*test)(struct inode *, void *),
 			    int (*set)(struct inode *, void *), void *data)
 {
@@ -1378,7 +1378,7 @@ EXPORT_SYMBOL(inode_insert5);
  * Note that both @test and @set are called with the inode_hash_lock held, so
  * they can't sleep.
  */
-struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
+struct inode *iget5_locked(struct super_block *sb, u64 hashval,
 		int (*test)(struct inode *, void *),
 		int (*set)(struct inode *, void *), void *data)
 {
@@ -1408,7 +1408,7 @@ EXPORT_SYMBOL(iget5_locked);
  * This is equivalent to iget5_locked, except the @test callback must
  * tolerate the inode not being stable, including being mid-teardown.
  */
-struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
+struct inode *iget5_locked_rcu(struct super_block *sb, u64 hashval,
 		int (*test)(struct inode *, void *),
 		int (*set)(struct inode *, void *), void *data)
 {
@@ -1455,7 +1455,7 @@ EXPORT_SYMBOL_GPL(iget5_locked_rcu);
  * hashed, and with the I_NEW flag set.  The file system gets to fill it in
  * before unlocking it via unlock_new_inode().
  */
-struct inode *iget_locked(struct super_block *sb, unsigned long ino)
+struct inode *iget_locked(struct super_block *sb, u64 ino)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
@@ -1527,7 +1527,7 @@ EXPORT_SYMBOL(iget_locked);
  *
  * Returns 1 if the inode number is unique, 0 if it is not.
  */
-static int test_inode_iunique(struct super_block *sb, unsigned long ino)
+static int test_inode_iunique(struct super_block *sb, u64 ino)
 {
 	struct hlist_head *b = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
@@ -1616,7 +1616,7 @@ EXPORT_SYMBOL(igrab);
  *
  * Note2: @test is called with the inode_hash_lock held, so can't sleep.
  */
-struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
+struct inode *ilookup5_nowait(struct super_block *sb, u64 hashval,
 		int (*test)(struct inode *, void *), void *data, bool *isnew)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
@@ -1647,7 +1647,7 @@ EXPORT_SYMBOL(ilookup5_nowait);
  *
  * Note: @test is called with the inode_hash_lock held, so can't sleep.
  */
-struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
+struct inode *ilookup5(struct super_block *sb, u64 hashval,
 		int (*test)(struct inode *, void *), void *data)
 {
 	struct inode *inode;
@@ -1677,7 +1677,7 @@ EXPORT_SYMBOL(ilookup5);
  * Search for the inode @ino in the inode cache, and if the inode is in the
  * cache, the inode is returned with an incremented reference count.
  */
-struct inode *ilookup(struct super_block *sb, unsigned long ino)
+struct inode *ilookup(struct super_block *sb, u64 ino)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
@@ -1726,8 +1726,8 @@ EXPORT_SYMBOL(ilookup);
  * very carefully implemented.
  */
 struct inode *find_inode_nowait(struct super_block *sb,
-				unsigned long hashval,
-				int (*match)(struct inode *, unsigned long,
+				u64 hashval,
+				int (*match)(struct inode *, u64,
 					     void *),
 				void *data)
 {
@@ -1773,7 +1773,7 @@ EXPORT_SYMBOL(find_inode_nowait);
  *
  * The caller must hold the RCU read lock.
  */
-struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
+struct inode *find_inode_rcu(struct super_block *sb, u64 hashval,
 			     int (*test)(struct inode *, void *), void *data)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, hashval);
@@ -1812,7 +1812,7 @@ EXPORT_SYMBOL(find_inode_rcu);
  * The caller must hold the RCU read lock.
  */
 struct inode *find_inode_by_ino_rcu(struct super_block *sb,
-				    unsigned long ino)
+				    u64 ino)
 {
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
@@ -1833,7 +1833,7 @@ EXPORT_SYMBOL(find_inode_by_ino_rcu);
 int insert_inode_locked(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
-	ino_t ino = inode->i_ino;
+	u64 ino = inode->i_ino;
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	bool isnew;
 
@@ -1884,7 +1884,7 @@ int insert_inode_locked(struct inode *inode)
 }
 EXPORT_SYMBOL(insert_inode_locked);
 
-int insert_inode_locked4(struct inode *inode, unsigned long hashval,
+int insert_inode_locked4(struct inode *inode, u64 hashval,
 		int (*test)(struct inode *, void *), void *data)
 {
 	struct inode *old;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b3dd145b25ec12b00ac1df17a952d9116b88047..dfa1f475b1c480c503ab6f00e891aa9b051607fa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2935,32 +2935,32 @@ static inline int inode_generic_drop(struct inode *inode)
 extern void d_mark_dontcache(struct inode *inode);
 
 extern struct inode *ilookup5_nowait(struct super_block *sb,
-		unsigned long hashval, int (*test)(struct inode *, void *),
+		u64 hashval, int (*test)(struct inode *, void *),
 		void *data, bool *isnew);
-extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
+extern struct inode *ilookup5(struct super_block *sb, u64 hashval,
 		int (*test)(struct inode *, void *), void *data);
-extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
+extern struct inode *ilookup(struct super_block *sb, u64 ino);
 
-extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
+extern struct inode *inode_insert5(struct inode *inode, u64 hashval,
 		int (*test)(struct inode *, void *),
 		int (*set)(struct inode *, void *),
 		void *data);
-struct inode *iget5_locked(struct super_block *, unsigned long,
+struct inode *iget5_locked(struct super_block *, u64,
 			   int (*test)(struct inode *, void *),
 			   int (*set)(struct inode *, void *), void *);
-struct inode *iget5_locked_rcu(struct super_block *, unsigned long,
+struct inode *iget5_locked_rcu(struct super_block *, u64,
 			       int (*test)(struct inode *, void *),
 			       int (*set)(struct inode *, void *), void *);
-extern struct inode * iget_locked(struct super_block *, unsigned long);
+extern struct inode *iget_locked(struct super_block *, u64);
 extern struct inode *find_inode_nowait(struct super_block *,
-				       unsigned long,
+				       u64,
 				       int (*match)(struct inode *,
-						    unsigned long, void *),
+						    u64, void *),
 				       void *data);
-extern struct inode *find_inode_rcu(struct super_block *, unsigned long,
+extern struct inode *find_inode_rcu(struct super_block *, u64,
 				    int (*)(struct inode *, void *), void *);
-extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long);
-extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
+extern struct inode *find_inode_by_ino_rcu(struct super_block *, u64);
+extern int insert_inode_locked4(struct inode *, u64, int (*test)(struct inode *, void *), void *);
 extern int insert_inode_locked(struct inode *);
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
@@ -3015,7 +3015,7 @@ int setattr_should_drop_sgid(struct mnt_idmap *idmap,
  */
 #define alloc_inode_sb(_sb, _cache, _gfp) kmem_cache_alloc_lru(_cache, &_sb->s_inode_lru, _gfp)
 
-extern void __insert_inode_hash(struct inode *, unsigned long hashval);
+extern void __insert_inode_hash(struct inode *, u64 hashval);
 static inline void insert_inode_hash(struct inode *inode)
 {
 	__insert_inode_hash(inode, inode->i_ino);

-- 
2.53.0


^ permalink raw reply related

* [PATCH v3 00/12] vfs: change inode->i_ino from unsigned long to u64
From: Jeff Layton @ 2026-03-04 15:32 UTC (permalink / raw)
  To: Alexander Viro, Christian Brauner, Jan Kara, Steven Rostedt,
	Masami Hiramatsu, Mathieu Desnoyers, Dan Williams, Eric Biggers,
	Theodore Y. Ts'o, Muchun Song, Oscar Salvador,
	David Hildenbrand, David Howells, Paulo Alcantara, Andreas Dilger,
	Jan Kara, Jaegeuk Kim, Chao Yu, Trond Myklebust, Anna Schumaker,
	Chuck Lever, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey,
	Steve French, Ronnie Sahlberg, Shyam Prasad N, Bharath SM,
	Alexander Aring, Ryusuke Konishi, Viacheslav Dubeyko,
	Eric Van Hensbergen, Latchesar Ionkov, Dominique Martinet,
	Christian Schoenebeck, David Sterba, Marc Dionne, Ian Kent,
	Luis de Bethencourt, Salah Triki, Tigran A. Aivazian,
	Ilya Dryomov, Alex Markuze, Jan Harkes, coda, Nicolas Pitre,
	Tyler Hicks, Amir Goldstein, Christoph Hellwig,
	John Paul Adrian Glaubitz, Yangtao Li, Mikulas Patocka,
	David Woodhouse, Richard Weinberger, Dave Kleikamp,
	Konstantin Komarov, Mark Fasheh, Joel Becker, Joseph Qi,
	Mike Marshall, Martin Brandenburg, Miklos Szeredi, Anders Larsen,
	Zhihao Cheng, Damien Le Moal, Naohiro Aota, Johannes Thumshirn,
	John Johansen, Paul Moore, James Morris, Serge E. Hallyn,
	Mimi Zohar, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg, Fan Wu,
	Stephen Smalley, Ondrej Mosnacek, Casey Schaufler, Alex Deucher,
	Christian König, David Airlie, Simona Vetter, Sumit Semwal,
	Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn,
	David S. Miller, Jakub Kicinski, Simon Horman, Oleg Nesterov,
	Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, James Clark, Darrick J. Wong,
	Martin Schiller, Eric Paris, Joerg Reuter, Marcel Holtmann,
	Johan Hedberg, Luiz Augusto von Dentz, Oliver Hartkopp,
	Marc Kleine-Budde, David Ahern, Neal Cardwell, Steffen Klassert,
	Herbert Xu, Remi Denis-Courmont, Marcelo Ricardo Leitner,
	Xin Long, Magnus Karlsson, Maciej Fijalkowski, Stanislav Fomichev,
	Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
	John Fastabend
  Cc: linux-fsdevel, linux-kernel, linux-trace-kernel, nvdimm, fsverity,
	linux-mm, netfs, linux-ext4, linux-f2fs-devel, linux-nfs,
	linux-cifs, samba-technical, linux-nilfs, v9fs, linux-afs, autofs,
	ceph-devel, codalist, ecryptfs, linux-mtd, jfs-discussion, ntfs3,
	ocfs2-devel, devel, linux-unionfs, apparmor,
	linux-security-module, linux-integrity, selinux, amd-gfx,
	dri-devel, linux-media, linaro-mm-sig, netdev, linux-perf-users,
	linux-fscrypt, linux-xfs, linux-hams, linux-x25, audit,
	linux-bluetooth, linux-can, linux-sctp, bpf, Jeff Layton

This version squashes all of the format-string changes and the i_ino
type change into the same patch. This results in a giant 600+ line patch
at the end of the series, but it does remain bisectable.  Because the
patchset was reorganized (again) some of the R-b's and A-b's have been
dropped.

The entire pile is in the "iino-u64" branch of my tree, if anyone is
interested in testing this.

    https://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux.git/

Original cover letter follows:

----------------------8<-----------------------

Christian said [1] to "just do it" when I proposed this, so here we are!

For historical reasons, the inode->i_ino field is an unsigned long,
which means that it's 32 bits on 32 bit architectures. This has caused a
number of filesystems to implement hacks to hash a 64-bit identifier
into a 32-bit field, and deprives us of a universal identifier field for
an inode.

This patchset changes the inode->i_ino field from an unsigned long to a
u64. This shouldn't make any material difference on 64-bit hosts, but
32-bit hosts will see struct inode grow by at least 4 bytes. This could
have effects on slabcache sizes and field alignment.

The bulk of the changes are to format strings and tracepoints, since the
kernel itself doesn't care that much about the i_ino field. The first
patch changes some vfs function arguments, so check that one out
carefully.

With this change, we may be able to shrink some inode structures. For
instance, struct nfs_inode has a fileid field that holds the 64-bit
inode number. With this set of changes, that field could be eliminated.
I'd rather leave that sort of cleanups for later just to keep this
simple.

Much of this set was generated by LLM, but I attributed it to myself
since I consider this to be in the "menial tasks" category of LLM usage.

[1]: https://lore.kernel.org/linux-fsdevel/20260219-portrait-winkt-959070cee42f@brauner/

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
Changes in v3:
- reorganize set for fewer patches, drop kino_t typedef and PRIino macro
- reorganize more TP_struct fields for better packing
- clean up ext4 goal calculation in ext4_ext_migrate()
- make audit_inode_hash() take a 64-bit argument
- Link to v2: https://lore.kernel.org/r/20260302-iino-u64-v2-0-e5388800dae0@kernel.org

Changes in v2:
- Use a typedef and macro and do the change in two steps to make it cleanly bisectable
- Fix check_for_busy_inodes() in fscrypt
- Added patch to reorganize tracepoint structs for better packing
- Added patch to change sock.sk_ino to u64
- Added patch to clean up internal handling of inode numbers in audit subsystem
- Drop some unnecessary casts
- Link to v1: https://lore.kernel.org/r/20260226-iino-u64-v1-0-ccceff366db9@kernel.org

---
Jeff Layton (12):
      vfs: widen inode hash/lookup functions to u64
      audit: widen ino fields to u64
      net: change sock.sk_ino and sock_i_ino() to u64
      vfs: widen trace event i_ino fields to u64
      cachefiles: widen trace event i_ino fields to u64
      ext2: widen trace event i_ino fields to u64
      hugetlbfs: widen trace event i_ino fields to u64
      zonefs: widen trace event i_ino fields to u64
      ext4: widen trace event i_ino fields to u64
      f2fs: widen trace event i_ino fields to u64
      nilfs2: widen trace event i_ino fields to u64
      treewide: change inode->i_ino from unsigned long to u64

 drivers/dma-buf/dma-buf.c                  |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   4 +-
 fs/9p/vfs_addr.c                           |   4 +-
 fs/9p/vfs_inode.c                          |   6 +-
 fs/9p/vfs_inode_dotl.c                     |   6 +-
 fs/affs/amigaffs.c                         |  10 +-
 fs/affs/bitmap.c                           |   2 +-
 fs/affs/dir.c                              |   2 +-
 fs/affs/file.c                             |  20 +-
 fs/affs/inode.c                            |  12 +-
 fs/affs/namei.c                            |  14 +-
 fs/affs/symlink.c                          |   2 +-
 fs/afs/dir.c                               |  10 +-
 fs/afs/dir_search.c                        |   2 +-
 fs/afs/dynroot.c                           |   2 +-
 fs/afs/inode.c                             |   2 +-
 fs/autofs/inode.c                          |   2 +-
 fs/befs/linuxvfs.c                         |  28 +-
 fs/bfs/dir.c                               |   4 +-
 fs/cachefiles/io.c                         |   6 +-
 fs/cachefiles/namei.c                      |  12 +-
 fs/cachefiles/xattr.c                      |   2 +-
 fs/ceph/crypto.c                           |   4 +-
 fs/coda/dir.c                              |   2 +-
 fs/coda/inode.c                            |   2 +-
 fs/cramfs/inode.c                          |   2 +-
 fs/crypto/crypto.c                         |   2 +-
 fs/crypto/hooks.c                          |   2 +-
 fs/crypto/keyring.c                        |   4 +-
 fs/crypto/keysetup.c                       |   2 +-
 fs/dcache.c                                |   4 +-
 fs/ecryptfs/crypto.c                       |   6 +-
 fs/ecryptfs/file.c                         |   2 +-
 fs/efs/inode.c                             |   6 +-
 fs/eventpoll.c                             |   2 +-
 fs/exportfs/expfs.c                        |   4 +-
 fs/ext2/dir.c                              |  10 +-
 fs/ext2/ialloc.c                           |   9 +-
 fs/ext2/inode.c                            |   2 +-
 fs/ext2/trace.h                            |   8 +-
 fs/ext2/xattr.c                            |  14 +-
 fs/ext4/dir.c                              |   2 +-
 fs/ext4/ext4.h                             |   4 +-
 fs/ext4/extents.c                          |   8 +-
 fs/ext4/extents_status.c                   |  28 +-
 fs/ext4/fast_commit.c                      |   8 +-
 fs/ext4/ialloc.c                           |  10 +-
 fs/ext4/indirect.c                         |   2 +-
 fs/ext4/inline.c                           |  14 +-
 fs/ext4/inode.c                            |  22 +-
 fs/ext4/ioctl.c                            |   4 +-
 fs/ext4/mballoc.c                          |   6 +-
 fs/ext4/migrate.c                          |   2 +-
 fs/ext4/move_extent.c                      |  20 +-
 fs/ext4/namei.c                            |  10 +-
 fs/ext4/orphan.c                           |  16 +-
 fs/ext4/page-io.c                          |  10 +-
 fs/ext4/super.c                            |  22 +-
 fs/ext4/xattr.c                            |  10 +-
 fs/f2fs/compress.c                         |   4 +-
 fs/f2fs/dir.c                              |   2 +-
 fs/f2fs/extent_cache.c                     |   8 +-
 fs/f2fs/f2fs.h                             |   6 +-
 fs/f2fs/file.c                             |  12 +-
 fs/f2fs/gc.c                               |   2 +-
 fs/f2fs/inline.c                           |   4 +-
 fs/f2fs/inode.c                            |  48 +--
 fs/f2fs/namei.c                            |   8 +-
 fs/f2fs/node.c                             |  12 +-
 fs/f2fs/recovery.c                         |  10 +-
 fs/f2fs/xattr.c                            |  10 +-
 fs/freevxfs/vxfs_bmap.c                    |   4 +-
 fs/fserror.c                               |   2 +-
 fs/hfs/catalog.c                           |   2 +-
 fs/hfs/extent.c                            |   4 +-
 fs/hfs/inode.c                             |   4 +-
 fs/hfsplus/attributes.c                    |  10 +-
 fs/hfsplus/catalog.c                       |   2 +-
 fs/hfsplus/dir.c                           |   6 +-
 fs/hfsplus/extents.c                       |   6 +-
 fs/hfsplus/inode.c                         |   8 +-
 fs/hfsplus/super.c                         |   6 +-
 fs/hfsplus/xattr.c                         |  10 +-
 fs/hpfs/dir.c                              |   4 +-
 fs/hpfs/dnode.c                            |   4 +-
 fs/hpfs/ea.c                               |   4 +-
 fs/hpfs/inode.c                            |   4 +-
 fs/inode.c                                 |  49 ++-
 fs/iomap/ioend.c                           |   2 +-
 fs/iomap/trace.h                           |   8 +-
 fs/isofs/compress.c                        |   2 +-
 fs/isofs/dir.c                             |   2 +-
 fs/isofs/inode.c                           |   6 +-
 fs/isofs/namei.c                           |   2 +-
 fs/jbd2/journal.c                          |   4 +-
 fs/jbd2/transaction.c                      |   2 +-
 fs/jffs2/dir.c                             |   4 +-
 fs/jffs2/file.c                            |   4 +-
 fs/jffs2/fs.c                              |  18 +-
 fs/jfs/inode.c                             |   2 +-
 fs/jfs/jfs_imap.c                          |   2 +-
 fs/jfs/jfs_metapage.c                      |   2 +-
 fs/lockd/svclock.c                         |   8 +-
 fs/lockd/svcsubs.c                         |   2 +-
 fs/locks.c                                 |   6 +-
 fs/minix/inode.c                           |  10 +-
 fs/nfs/dir.c                               |  20 +-
 fs/nfs/file.c                              |   8 +-
 fs/nfs/filelayout/filelayout.c             |   8 +-
 fs/nfs/flexfilelayout/flexfilelayout.c     |   8 +-
 fs/nfs/inode.c                             |   6 +-
 fs/nfs/nfs4proc.c                          |   4 +-
 fs/nfs/pnfs.c                              |  12 +-
 fs/nfsd/export.c                           |   2 +-
 fs/nfsd/nfs4state.c                        |   4 +-
 fs/nfsd/nfsfh.c                            |   4 +-
 fs/nfsd/vfs.c                              |   2 +-
 fs/nilfs2/alloc.c                          |  10 +-
 fs/nilfs2/bmap.c                           |   2 +-
 fs/nilfs2/btnode.c                         |   2 +-
 fs/nilfs2/btree.c                          |  12 +-
 fs/nilfs2/dir.c                            |  12 +-
 fs/nilfs2/direct.c                         |   4 +-
 fs/nilfs2/gcinode.c                        |   2 +-
 fs/nilfs2/inode.c                          |   8 +-
 fs/nilfs2/mdt.c                            |   2 +-
 fs/nilfs2/namei.c                          |   2 +-
 fs/nilfs2/segment.c                        |   2 +-
 fs/notify/fdinfo.c                         |   4 +-
 fs/nsfs.c                                  |   4 +-
 fs/ntfs3/super.c                           |   2 +-
 fs/ocfs2/alloc.c                           |   2 +-
 fs/ocfs2/aops.c                            |   4 +-
 fs/ocfs2/dir.c                             |   8 +-
 fs/ocfs2/dlmfs/dlmfs.c                     |  10 +-
 fs/ocfs2/extent_map.c                      |  12 +-
 fs/ocfs2/inode.c                           |   2 +-
 fs/ocfs2/quota_local.c                     |   2 +-
 fs/ocfs2/refcounttree.c                    |  10 +-
 fs/ocfs2/xattr.c                           |   4 +-
 fs/orangefs/inode.c                        |   2 +-
 fs/overlayfs/export.c                      |   2 +-
 fs/overlayfs/namei.c                       |   4 +-
 fs/overlayfs/util.c                        |   2 +-
 fs/pipe.c                                  |   2 +-
 fs/proc/fd.c                               |   2 +-
 fs/proc/task_mmu.c                         |   4 +-
 fs/qnx4/inode.c                            |   4 +-
 fs/qnx6/inode.c                            |   2 +-
 fs/ubifs/debug.c                           |   8 +-
 fs/ubifs/dir.c                             |  28 +-
 fs/ubifs/file.c                            |  28 +-
 fs/ubifs/journal.c                         |   6 +-
 fs/ubifs/super.c                           |  16 +-
 fs/ubifs/tnc.c                             |   4 +-
 fs/ubifs/xattr.c                           |  14 +-
 fs/udf/directory.c                         |  18 +-
 fs/udf/file.c                              |   2 +-
 fs/udf/inode.c                             |  12 +-
 fs/udf/namei.c                             |   8 +-
 fs/udf/super.c                             |   2 +-
 fs/ufs/balloc.c                            |   6 +-
 fs/ufs/dir.c                               |  10 +-
 fs/ufs/ialloc.c                            |   6 +-
 fs/ufs/inode.c                             |  18 +-
 fs/ufs/ufs_fs.h                            |   6 +-
 fs/ufs/util.c                              |   2 +-
 fs/verity/init.c                           |   2 +-
 fs/zonefs/super.c                          |   8 +-
 fs/zonefs/trace.h                          |  18 +-
 include/linux/audit.h                      |   2 +-
 include/linux/fs.h                         |  28 +-
 include/net/sock.h                         |   4 +-
 include/trace/events/cachefiles.h          |  18 +-
 include/trace/events/ext4.h                | 544 ++++++++++++++---------------
 include/trace/events/f2fs.h                | 242 ++++++-------
 include/trace/events/filelock.h            |  34 +-
 include/trace/events/filemap.h             |  20 +-
 include/trace/events/fs_dax.h              |  20 +-
 include/trace/events/fsverity.h            |  30 +-
 include/trace/events/hugetlbfs.h           |  42 +--
 include/trace/events/netfs.h               |   8 +-
 include/trace/events/nilfs2.h              |  12 +-
 include/trace/events/readahead.h           |  18 +-
 include/trace/events/timestamp.h           |  16 +-
 include/trace/events/writeback.h           | 162 ++++-----
 kernel/audit.h                             |  13 +-
 kernel/audit_fsnotify.c                    |   4 +-
 kernel/audit_watch.c                       |  12 +-
 kernel/auditsc.c                           |   4 +-
 kernel/events/uprobes.c                    |   4 +-
 net/ax25/af_ax25.c                         |   2 +-
 net/bluetooth/af_bluetooth.c               |   4 +-
 net/can/bcm.c                              |   2 +-
 net/ipv4/ping.c                            |   2 +-
 net/ipv4/raw.c                             |   2 +-
 net/ipv4/tcp_ipv4.c                        |   2 +-
 net/ipv4/udp.c                             |   2 +-
 net/ipv6/datagram.c                        |   2 +-
 net/ipv6/tcp_ipv6.c                        |   2 +-
 net/key/af_key.c                           |   2 +-
 net/netlink/af_netlink.c                   |   2 +-
 net/netlink/diag.c                         |   2 +-
 net/netrom/af_netrom.c                     |   4 +-
 net/packet/af_packet.c                     |   2 +-
 net/packet/diag.c                          |   2 +-
 net/phonet/socket.c                        |   4 +-
 net/rose/af_rose.c                         |   4 +-
 net/sctp/proc.c                            |   4 +-
 net/socket.c                               |   2 +-
 net/unix/af_unix.c                         |   2 +-
 net/unix/diag.c                            |   6 +-
 net/x25/x25_proc.c                         |   4 +-
 net/xdp/xsk_diag.c                         |   2 +-
 security/apparmor/apparmorfs.c             |   4 +-
 security/integrity/integrity_audit.c       |   2 +-
 security/ipe/audit.c                       |   2 +-
 security/lsm_audit.c                       |  10 +-
 security/selinux/hooks.c                   |  10 +-
 security/smack/smack_lsm.c                 |  12 +-
 220 files changed, 1282 insertions(+), 1283 deletions(-)
---
base-commit: 842cfe0733c5a03982a7ae496de6fdc0dd661a41
change-id: 20260224-iino-u64-b44a3a72543c

Best regards,
-- 
Jeff Layton <jlayton@kernel.org>


^ permalink raw reply

* Re: [PATCH v4 4/5] mm: rename zone->lock to zone->_lock
From: SeongJae Park @ 2026-03-04 15:17 UTC (permalink / raw)
  To: SeongJae Park
  Cc: Dmitry Ilvokhin, Andrew Morton, David Hildenbrand,
	Lorenzo Stoakes, Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Axel Rasmussen, Yuanchu Xie,
	Wei Xu, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Rafael J. Wysocki, Pavel Machek, Len Brown, Brendan Jackman,
	Johannes Weiner, Zi Yan, Oscar Salvador, Qi Zheng, Shakeel Butt,
	linux-kernel, linux-mm, linux-trace-kernel, linux-pm
In-Reply-To: <20260304151335.172572-1-sj@kernel.org>

On Wed,  4 Mar 2026 07:13:34 -0800 SeongJae Park <sj@kernel.org> wrote:

> On Wed, 4 Mar 2026 13:01:45 +0000 Dmitry Ilvokhin <d@ilvokhin.com> wrote:
> 
> > On Tue, Mar 03, 2026 at 05:50:34PM -0800, SeongJae Park wrote:
> > > On Tue, 3 Mar 2026 14:25:55 +0000 Dmitry Ilvokhin <d@ilvokhin.com> wrote:
> > > 
> > > > On Mon, Mar 02, 2026 at 02:37:43PM -0800, Andrew Morton wrote:
> > > > > On Mon, 2 Mar 2026 15:10:03 +0100 "Vlastimil Babka (SUSE)" <vbabka@kernel.org> wrote:
> > > > > 
> > > > > > On 2/27/26 17:00, Dmitry Ilvokhin wrote:
> > > > > > > This intentionally breaks direct users of zone->lock at compile time so
> > > > > > > all call sites are converted to the zone lock wrappers. Without the
> > > > > > > rename, present and future out-of-tree code could continue using
> > > > > > > spin_lock(&zone->lock) and bypass the wrappers and tracing
> > > > > > > infrastructure.
> > > > > > > 
> > > > > > > No functional change intended.
> > > > > > > 
> > > > > > > Suggested-by: Andrew Morton <akpm@linux-foundation.org>
> > > > > > > Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
> > > > > > > Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> > > > > > > Acked-by: SeongJae Park <sj@kernel.org>
> > > > > > 
> > > > > > I see some more instances of 'zone->lock' in comments in
> > > > > > include/linux/mmzone.h and under Documentation/ but otherwise LGTM.
> > > > > > 
> > > > > 
> > > > > I fixed (most of) that in the previous version but my fix was lost.
> > > > 
> > > > Thanks for the fixups, Andrew.
> > > > 
> > > > I still see a few 'zone->lock' references in Documentation remain on
> > > > mm-new. This patch cleans them up, as noted by Vlastimil.
> > > > 
> > > > I'm happy to adjust this patch if anything else needs attention.
> > > > 
> > > > From 9142d5a8b60038fa424a6033253960682e5a51f4 Mon Sep 17 00:00:00 2001
> > > > From: Dmitry Ilvokhin <d@ilvokhin.com>
> > > > Date: Tue, 3 Mar 2026 06:13:13 -0800
> > > > Subject: [PATCH] mm: fix remaining zone->lock references
> > > > 
> > > > Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
> > > > ---
> > > >  Documentation/mm/physical_memory.rst | 4 ++--
> > > >  Documentation/trace/events-kmem.rst  | 8 ++++----
> > > >  2 files changed, 6 insertions(+), 6 deletions(-)
> > > > 
> > > > diff --git a/Documentation/mm/physical_memory.rst b/Documentation/mm/physical_memory.rst
> > > > index b76183545e5b..e344f93515b6 100644
> > > > --- a/Documentation/mm/physical_memory.rst
> > > > +++ b/Documentation/mm/physical_memory.rst
> > > > @@ -500,11 +500,11 @@ General
> > > >  ``nr_isolate_pageblock``
> > > >    Number of isolated pageblocks. It is used to solve incorrect freepage counting
> > > >    problem due to racy retrieving migratetype of pageblock. Protected by
> > > > -  ``zone->lock``. Defined only when ``CONFIG_MEMORY_ISOLATION`` is enabled.
> > > > +  ``zone_lock``. Defined only when ``CONFIG_MEMORY_ISOLATION`` is enabled.
> > > 
> > > Dmitry's original patch [1] was doing 's/zone->lock/zone->_lock/', which aligns
> > > to my expectation.  But this patch is doing 's/zone->lock/zone_lock/'.  Same
> > > for the rest of this patch.
> > > 
> > > I was initially thinking this is just a mistake, but I also found Andrew is
> > > doing same change [2], so I'm bit confused.  Is this an intentional change?
> > > 
> > > [1] https://lore.kernel.org/d61500c5784c64e971f4d328c57639303c475f81.1772206930.git.d@ilvokhin.com
> > > [2] https://lore.kernel.org/20260302143743.220eed4feb36d7572fe726cc@linux-foundation.org
> > > 
> > 
> > Good catch, thanks for pointing this out, SJ.
> > 
> > Originally the mechanical rename was indeed zone->lock -> zone->_lock.
> > However, in Documentation I intentionally switched references to
> > zone_lock instead of zone->_lock. The reasoning is that _lock is now an
> > internal implementation detail, and direct access is discouraged. The
> > intended interface is via the zone_lock_*() / zone_unlock_*() wrappers,
> > so referencing zone_lock in documentation felt more appropriate than
> > mentioning the private struct field (zone->_lock).
> 
> Thank you for this nice and kind clarification, Dmitry!  I agree mentioning
> zone_[un]lock_*() helpers instead of the hidden member (zone->_lock) can be
> better.
> 
> But, I'm concerned if people like me might not aware the intention under
> 'zone_lock'.  If there is a well-known convention that allows people to know it
> is for 'zone_[un]lock_*()' helpers, making it more clear would be nice, in my
> humble opinion.  If there is such a convention but I'm just missing it, please
> ignore.  If I'm not, for eaxmaple,
> 
> "protected by ``zone->lock``" could be re-wrote to
> "protected by ``zone_[un]lock_*()`` locking helpers" or,
> "protected by zone lock helper functions (``zone_[un]lock_*()``)" ?

Maybe too verbose and people who not used to regex might be confused.
Mentioning mmzone_lock.h might be better?  E.g.,

    protected by functions in mmzone_lock.h

> 
> > 
> > That said, I agree this creates inconsistency with the mechanical
> > rename, and I'm happy to adjust either way: either consistently refer
> > to the wrapper API, or keep documentation aligned with zone->_lock.
> > 
> > I slightly prefer referring to the wrapper API, but don't have a strong
> > preference as long as we're consistent.
> 
> I also think both approaches are good.  But for the wrapper approach, I think
> giving more contexts rather than just ``zone_lock`` to readers would be nice.


Thanks,
SJ

[...]

^ permalink raw reply

* Re: [PATCH v4 4/5] mm: rename zone->lock to zone->_lock
From: SeongJae Park @ 2026-03-04 15:13 UTC (permalink / raw)
  To: Dmitry Ilvokhin
  Cc: SeongJae Park, Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Axel Rasmussen, Yuanchu Xie,
	Wei Xu, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Rafael J. Wysocki, Pavel Machek, Len Brown, Brendan Jackman,
	Johannes Weiner, Zi Yan, Oscar Salvador, Qi Zheng, Shakeel Butt,
	linux-kernel, linux-mm, linux-trace-kernel, linux-pm
In-Reply-To: <aagtOctLQqpDcw_h@shell.ilvokhin.com>

On Wed, 4 Mar 2026 13:01:45 +0000 Dmitry Ilvokhin <d@ilvokhin.com> wrote:

> On Tue, Mar 03, 2026 at 05:50:34PM -0800, SeongJae Park wrote:
> > On Tue, 3 Mar 2026 14:25:55 +0000 Dmitry Ilvokhin <d@ilvokhin.com> wrote:
> > 
> > > On Mon, Mar 02, 2026 at 02:37:43PM -0800, Andrew Morton wrote:
> > > > On Mon, 2 Mar 2026 15:10:03 +0100 "Vlastimil Babka (SUSE)" <vbabka@kernel.org> wrote:
> > > > 
> > > > > On 2/27/26 17:00, Dmitry Ilvokhin wrote:
> > > > > > This intentionally breaks direct users of zone->lock at compile time so
> > > > > > all call sites are converted to the zone lock wrappers. Without the
> > > > > > rename, present and future out-of-tree code could continue using
> > > > > > spin_lock(&zone->lock) and bypass the wrappers and tracing
> > > > > > infrastructure.
> > > > > > 
> > > > > > No functional change intended.
> > > > > > 
> > > > > > Suggested-by: Andrew Morton <akpm@linux-foundation.org>
> > > > > > Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
> > > > > > Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> > > > > > Acked-by: SeongJae Park <sj@kernel.org>
> > > > > 
> > > > > I see some more instances of 'zone->lock' in comments in
> > > > > include/linux/mmzone.h and under Documentation/ but otherwise LGTM.
> > > > > 
> > > > 
> > > > I fixed (most of) that in the previous version but my fix was lost.
> > > 
> > > Thanks for the fixups, Andrew.
> > > 
> > > I still see a few 'zone->lock' references in Documentation remain on
> > > mm-new. This patch cleans them up, as noted by Vlastimil.
> > > 
> > > I'm happy to adjust this patch if anything else needs attention.
> > > 
> > > From 9142d5a8b60038fa424a6033253960682e5a51f4 Mon Sep 17 00:00:00 2001
> > > From: Dmitry Ilvokhin <d@ilvokhin.com>
> > > Date: Tue, 3 Mar 2026 06:13:13 -0800
> > > Subject: [PATCH] mm: fix remaining zone->lock references
> > > 
> > > Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
> > > ---
> > >  Documentation/mm/physical_memory.rst | 4 ++--
> > >  Documentation/trace/events-kmem.rst  | 8 ++++----
> > >  2 files changed, 6 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/Documentation/mm/physical_memory.rst b/Documentation/mm/physical_memory.rst
> > > index b76183545e5b..e344f93515b6 100644
> > > --- a/Documentation/mm/physical_memory.rst
> > > +++ b/Documentation/mm/physical_memory.rst
> > > @@ -500,11 +500,11 @@ General
> > >  ``nr_isolate_pageblock``
> > >    Number of isolated pageblocks. It is used to solve incorrect freepage counting
> > >    problem due to racy retrieving migratetype of pageblock. Protected by
> > > -  ``zone->lock``. Defined only when ``CONFIG_MEMORY_ISOLATION`` is enabled.
> > > +  ``zone_lock``. Defined only when ``CONFIG_MEMORY_ISOLATION`` is enabled.
> > 
> > Dmitry's original patch [1] was doing 's/zone->lock/zone->_lock/', which aligns
> > to my expectation.  But this patch is doing 's/zone->lock/zone_lock/'.  Same
> > for the rest of this patch.
> > 
> > I was initially thinking this is just a mistake, but I also found Andrew is
> > doing same change [2], so I'm bit confused.  Is this an intentional change?
> > 
> > [1] https://lore.kernel.org/d61500c5784c64e971f4d328c57639303c475f81.1772206930.git.d@ilvokhin.com
> > [2] https://lore.kernel.org/20260302143743.220eed4feb36d7572fe726cc@linux-foundation.org
> > 
> 
> Good catch, thanks for pointing this out, SJ.
> 
> Originally the mechanical rename was indeed zone->lock -> zone->_lock.
> However, in Documentation I intentionally switched references to
> zone_lock instead of zone->_lock. The reasoning is that _lock is now an
> internal implementation detail, and direct access is discouraged. The
> intended interface is via the zone_lock_*() / zone_unlock_*() wrappers,
> so referencing zone_lock in documentation felt more appropriate than
> mentioning the private struct field (zone->_lock).

Thank you for this nice and kind clarification, Dmitry!  I agree mentioning
zone_[un]lock_*() helpers instead of the hidden member (zone->_lock) can be
better.

But, I'm concerned if people like me might not aware the intention under
'zone_lock'.  If there is a well-known convention that allows people to know it
is for 'zone_[un]lock_*()' helpers, making it more clear would be nice, in my
humble opinion.  If there is such a convention but I'm just missing it, please
ignore.  If I'm not, for eaxmaple,

"protected by ``zone->lock``" could be re-wrote to
"protected by ``zone_[un]lock_*()`` locking helpers" or,
"protected by zone lock helper functions (``zone_[un]lock_*()``)" ?

> 
> That said, I agree this creates inconsistency with the mechanical
> rename, and I'm happy to adjust either way: either consistently refer
> to the wrapper API, or keep documentation aligned with zone->_lock.
> 
> I slightly prefer referring to the wrapper API, but don't have a strong
> preference as long as we're consistent.

I also think both approaches are good.  But for the wrapper approach, I think
giving more contexts rather than just ``zone_lock`` to readers would be nice.


Thanks,
SJ

[...]

^ permalink raw reply

* Re: [PATCH v3 1/5] tools/rtla: Consolidate nr_cpus usage across all tools
From: Tomas Glozar @ 2026-03-04 14:59 UTC (permalink / raw)
  To: Costa Shulyupin
  Cc: Steven Rostedt, Crystal Wood, Wander Lairson Costa, Ivan Pravdin,
	John Kacur, Tiezhu Yang, linux-trace-kernel, linux-kernel, bpf
In-Reply-To: <20260213115234.430232-2-costa.shul@redhat.com>

pá 13. 2. 2026 v 12:53 odesílatel Costa Shulyupin
<costa.shul@redhat.com> napsal:
> --- a/tools/tracing/rtla/src/common.c
> +++ b/tools/tracing/rtla/src/common.c
> @@ -5,12 +5,14 @@
>  #include <signal.h>
>  #include <stdlib.h>
>  #include <string.h>
> -#include <unistd.h>
>  #include <getopt.h>
> +#include <sys/sysinfo.h>
> +
>  #include "common.h"
>
>  struct trace_instance *trace_inst;
>  volatile int stop_tracing;
> +int nr_cpus;
>

This should be updated on top of the unit tests patchset [1] to also
include and initialize this global variable in tests, right? Without
that, unit tests fail to compile with undefined reference.

[1] https://lore.kernel.org/linux-trace-kernel/20260119105857.797498-1-costa.shul@redhat.com/

Tomas


^ permalink raw reply

* Re: [PATCH v3 5/5] tools/rtla: Remove unneeded cpus parameter from timerlat BPF functions
From: Tomas Glozar @ 2026-03-04 14:56 UTC (permalink / raw)
  To: Costa Shulyupin
  Cc: Steven Rostedt, Crystal Wood, Wander Lairson Costa, Ivan Pravdin,
	John Kacur, Tiezhu Yang, linux-trace-kernel, linux-kernel, bpf
In-Reply-To: <20260213115234.430232-6-costa.shul@redhat.com>

pá 13. 2. 2026 v 12:53 odesílatel Costa Shulyupin
<costa.shul@redhat.com> napsal:
>
> nr_cpus is a global variable available throughout the codebase, so
> passing it as a function parameter is unnecessary.
>
> Remove the cpus parameter from timerlat_bpf_get_hist_value() and
> get_value(), using the global nr_cpus directly.
>
> Signed-off-by: Costa Shulyupin <costa.shul@redhat.com>
> ---
>  tools/tracing/rtla/src/timerlat_bpf.c  | 16 +++++++---------
>  tools/tracing/rtla/src/timerlat_bpf.h  |  6 ++----
>  tools/tracing/rtla/src/timerlat_hist.c |  2 +-
>  3 files changed, 10 insertions(+), 14 deletions(-)
>

Is there a reason to have a separate commit for this? The change fits
into the "tools/rtla: Remove unneeded nr_cpus arguments", which also
touches some of the functions in timerlat_bpf.c. It feels strange to
have some timerlat BPF functions cleaned up in that commit and some in
this one.

Tomas


^ permalink raw reply

* Re: [RFC PATCH 1/2] locking: add mutex_lock_nospin()
From: Yafang Shao @ 2026-03-04 14:25 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, will, boqun, longman, rostedt, mhiramat, mark.rutland,
	mathieu.desnoyers, linux-kernel, linux-trace-kernel, bpf
In-Reply-To: <20260304124148.GA2277644@noisy.programming.kicks-ass.net>

On Wed, Mar 4, 2026 at 8:41 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Wed, Mar 04, 2026 at 07:52:06PM +0800, Yafang Shao wrote:
> > On Wed, Mar 4, 2026 at 6:11 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > On Wed, Mar 04, 2026 at 05:37:31PM +0800, Yafang Shao wrote:
> > > > On Wed, Mar 4, 2026 at 5:03 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > > > >
> > > > > On Wed, Mar 04, 2026 at 03:46:49PM +0800, Yafang Shao wrote:
> > > > > > Introduce mutex_lock_nospin(), a helper that disables optimistic spinning
> > > > > > on the owner for specific heavy locks. This prevents long spinning times
> > > > > > that can lead to latency spikes for other tasks on the same runqueue.
> > > > >
> > > > > This makes no sense; spinning stops on need_resched().
> > > >
> > > > Hello Peter,
> > > >
> > > > The condition to stop spinning on need_resched() relies on the mutex
> > > > owner remaining unchanged. However, when multiple tasks contend for
> > > > the same lock, the owner can change frequently. This creates a
> > > > potential TOCTOU (Time of Check to Time of Use) issue.
> > > >
> > > >   mutex_optimistic_spin
> > > >       owner = __mutex_trylock_or_owner(lock);
> > > >       mutex_spin_on_owner
> > > >           // the __mutex_owner(lock) might get a new owner.
> > > >           while (__mutex_owner(lock) == owner)
> > > >
> > >
> > > How do these new owners become the owner? Are they succeeding the
> > > __mutex_trylock() that sits before mutex_optimistic_spin() and
> > > effectively starving the spinner?
> > >
> > > Something like the below would make a difference if that were so.
> >
> > The following change made no difference; concurrent runs still result
> > in prolonged system time.
> >
> > real 0m5.265s user 0m0.000s sys 0m4.921s
> > real 0m5.295s user 0m0.002s sys 0m4.697s
> > real 0m5.293s user 0m0.003s sys 0m4.844s
> > real 0m5.303s user 0m0.001s sys 0m4.511s
> > real 0m5.303s user 0m0.000s sys 0m4.694s
> > real 0m5.302s user 0m0.002s sys 0m4.677s
> > real 0m5.313s user 0m0.000s sys 0m4.837s
> > real 0m5.327s user 0m0.000s sys 0m4.808s
> > real 0m5.330s user 0m0.001s sys 0m4.893s
> > real 0m5.358s user 0m0.005s sys 0m4.919s
> >
> > Our kernel is not built with CONFIG_PREEMPT enabled, so prolonged
> > system time can lead to CPU pressure and potential latency spikes.
> > Since we can reliably reproduce this unnecessary spinning, why not
> > improve it to reduce the overhead?
>
> If you cannot explain what the problem is (you haven't), there is
> nothing to fix.
>
> Also, current kernels cannot be build without PREEMPT; and if you care
> about latency running a PREEMPT=n kernel is daft. That said,
> TIF_NEED_RESCHED should work irrespective of PREEMPT settings, the
> PREEMPT settings just affect when and how you end up in schedule().
>
> Even without PREEMPT, if there is a task waiting either the wakeup or
> the tick will set TIF_NEED_RESCHED and it should stop spinning. If there
> is no task waiting, there is no actual latency, just burning CPU time,
> and that isn't a problem per-se.
>
> What should happen is that the first spinner gets the lock next, the
> next spinner is then promoted to first spinner and so on.
>
> This chain continues, which means the lock owner is always
> on-cpu and good progress is being made and there is no CPU contention,
> or the spinner gets marked for preemption (as said, this does not
> require PREEMPT=y) and will stop spinning and go sleep, or the owner
> goes to sleep and all the spinners stop and also go sleep.
>
> Again, you have not said anything specific enough to figure out what
> happens on your end. You said the owner changes, this means there is
> progress made. What isn't clear is if any one particular spinner is
> starved (that would be a problem)

As far as I can tell, no spinner is starved. The spinner and the
impacted task are interleaved, which is expected behavior.

> or if this latency spike you observe
> is worse than would be from running a while(1) loop, in which case,
> that's just how it is.

The latency spike occurs because the impacted task must wait for the
spinner to voluntarily yield the CPU. In effect, the spinner behaves
similarly to a while (1) {} loop.

So the real problem here is that we should avoid unnecessary spinning.
Is there any reason we have to spin to speed up the ftrace_lock? I
believe not.

>
> What is not sane, is marking random locks with random properties just
> because random workload.



-- 
Regards
Yafang

^ permalink raw reply

* Re: [PATCH v2 001/110] vfs: introduce kino_t typedef and PRIino format macro
From: Christian Brauner @ 2026-03-04 14:10 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Christoph Hellwig, Darrick J. Wong, Theodore Tso, Alexander Viro,
	Jan Kara, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Dan Williams, Matthew Wilcox, Eric Biggers, Muchun Song,
	Oscar Salvador, David Hildenbrand, David Howells, Paulo Alcantara,
	Andreas Dilger, Jan Kara, Jaegeuk Kim, Chao Yu, Trond Myklebust,
	Anna Schumaker, Chuck Lever, NeilBrown, Olga Kornievskaia,
	Dai Ngo, Tom Talpey, Steve French, Ronnie Sahlberg,
	Shyam Prasad N, Bharath SM, Alexander Aring, Ryusuke Konishi,
	Viacheslav Dubeyko, Eric Van Hensbergen, Latchesar Ionkov,
	Dominique Martinet, Christian Schoenebeck, David Sterba,
	Marc Dionne, Ian Kent, Luis de Bethencourt, Salah Triki,
	Tigran A. Aivazian, Ilya Dryomov, Alex Markuze, Jan Harkes, coda,
	Nicolas Pitre, Tyler Hicks, Amir Goldstein,
	John Paul Adrian Glaubitz, Yangtao Li, Mikulas Patocka,
	David Woodhouse, Richard Weinberger, Dave Kleikamp,
	Konstantin Komarov, Mark Fasheh, Joel Becker, Joseph Qi,
	Mike Marshall, Martin Brandenburg, Miklos Szeredi, Anders Larsen,
	Zhihao Cheng, Damien Le Moal, Naohiro Aota, Johannes Thumshirn,
	John Johansen, Paul Moore, James Morris, Serge E. Hallyn,
	Mimi Zohar, Roberto Sassu, Dmitry Kasatkin, Eric Snowberg, Fan Wu,
	Stephen Smalley, Ondrej Mosnacek, Casey Schaufler, Alex Deucher,
	Christian König, David Airlie, Simona Vetter, Sumit Semwal,
	Eric Dumazet, Kuniyuki Iwashima, Paolo Abeni, Willem de Bruijn,
	David S. Miller, Jakub Kicinski, Simon Horman, Oleg Nesterov,
	Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
	Namhyung Kim, Mark Rutland, Alexander Shishkin, Jiri Olsa,
	Ian Rogers, Adrian Hunter, James Clark, Martin Schiller,
	Eric Paris, Joerg Reuter, Marcel Holtmann, Johan Hedberg,
	Luiz Augusto von Dentz, Oliver Hartkopp, Marc Kleine-Budde,
	David Ahern, Neal Cardwell, Steffen Klassert, Herbert Xu,
	Remi Denis-Courmont, Marcelo Ricardo Leitner, Xin Long,
	Magnus Karlsson, Maciej Fijalkowski, Stanislav Fomichev,
	Alexei Starovoitov, Daniel Borkmann, Jesper Dangaard Brouer,
	John Fastabend, linux-fsdevel, linux-kernel, linux-trace-kernel,
	nvdimm, fsverity, linux-mm, netfs, linux-ext4, linux-f2fs-devel,
	linux-nfs, linux-cifs, samba-technical, linux-nilfs, v9fs,
	linux-afs, autofs, ceph-devel, codalist, ecryptfs, linux-mtd,
	jfs-discussion, ntfs3, ocfs2-devel, devel, linux-unionfs,
	apparmor, linux-security-module, linux-integrity, selinux,
	amd-gfx, dri-devel, linux-media, linaro-mm-sig, netdev,
	linux-perf-users, linux-fscrypt, linux-xfs, linux-hams, linux-x25,
	audit, linux-bluetooth, linux-can, linux-sctp, bpf
In-Reply-To: <4d3b9b92da613ad329b822f3f6043fa08f534451.camel@kernel.org>

On Tue, Mar 03, 2026 at 10:14:27AM -0500, Jeff Layton wrote:
> On Tue, 2026-03-03 at 06:30 -0800, Christoph Hellwig wrote:
> > On Tue, Mar 03, 2026 at 09:19:42AM -0500, Jeff Layton wrote:
> > > On Tue, 2026-03-03 at 05:59 -0800, Christoph Hellwig wrote:
> > > > On Tue, Mar 03, 2026 at 08:43:15AM -0500, Jeff Layton wrote:
> > > > > On Tue, 2026-03-03 at 05:37 -0800, Christoph Hellwig wrote:
> > > > > > On Tue, Mar 03, 2026 at 05:53:39AM -0500, Jeff Layton wrote:
> > > > > > > Like I said to Ted, this is just temporary scaffolding for the change.
> > > > > > > The PRIino macro is removed in the end. Given that, perhaps you can
> > > > > > > overlook the bikeshed's color in this instance?
> > > > > > 
> > > > > > So why add it in the first place?  
> > > > > 
> > > > > Bisectability. The first version I did of this would have broken the
> > > > > ability to bisect properly across these changes. I don't love the
> > > > > "churn" here either, but this should be cleanly bisectable.
> > > > 
> > > > What do you need to bisect in format string changes?  Splitting
> > > > every variable type change outside of the main i_ino out - sure.
> > > > But bisecting that "change to u64 in ext4" really broke ext4 and
> > > > not "change to u64" is not very useful.  Commits should do one
> > > > well defined thing.  Adding a weird transition layer for a format
> > > > thing that just gets dropped is not one well defined thing.
> > > 
> > > In the middle stages of the series, you will get warnings or errors on
> > > 32-bit hosts when i_ino's type doesn't match what the format string
> > > expects.
> > > 
> > > There are really only three options here:
> > > 
> > > 1/ Do (almost) all of the changes in one giant patch
> > > 
> > > 2/ Accept that the build may break during the interim stages
> > > 
> > > 3/ This series: using a typedef and macro to work around the breakage
> > > until the type can be changed, at the expense of some extra churn in
> > > the codebase
> > > 
> > > 3 seems like the lesser evil.
> > 
> > No, 1 is by far the least evil.  Note that it's not really almost all,
> > as all the local variables can easily and sanely be split out.  It's
> > all of the format strings, and that makes sense.  The only "regressions"
> > there are incorrect format strings which have good warnings and can
> > be fixed easily.
> 
> Well, I've done 2 and 3 already. Why not 1? :)
> 
> It's not so much the regressions that are a problem here, but the merge
> conflicts for anyone wanting to backport later patches that are near
> these format changes. Having that change broken up by subsystem makes
> it easier to handle that piecemeal later.
> 
> I think we'll be looking at close to a 1000 line patch that touches
> nearly 200 files if go that route. Roughly:
> 
>  182 files changed, 910 insertions(+), 912 deletions(-)
> 
> There are some tracepoint changes in some of the per-subsystem patches
> that will need to be split out, so the count isn't exact, but it'll be
> fairly close.
> 
> Since Christian will probably end up taking this series, I'd like to
> get his opinion before I respin anything.

I'm kinda surprised that we suddenly started caring about the amount of
individual patches. I personally don't care either way. Do it in one
giant patch if this moves us forward. I've done 1 and 3 and what you
did. And I'd be really annoyed if during a bisect I start to get
pointless build failures because someone did 2.

^ permalink raw reply

* Re: [PATCH v3 12/18] rtla: Enforce exact match for time unit suffixes
From: Tomas Glozar @ 2026-03-04 13:57 UTC (permalink / raw)
  To: Wander Lairson Costa
  Cc: Steven Rostedt, Crystal Wood, Ivan Pravdin, Costa Shulyupin,
	John Kacur, Tiezhu Yang, Daniel Wagner,
	Daniel Bristot de Oliveira,
	open list:Real-time Linux Analysis (RTLA) tools,
	open list:Real-time Linux Analysis (RTLA) tools,
	open list:BPF [MISC]:Keyword:(?:b|_)bpf(?:b|_)
In-Reply-To: <20260115163650.118910-13-wander@redhat.com>

čt 15. 1. 2026 v 18:28 odesílatel Wander Lairson Costa
<wander@redhat.com> napsal:
>
> The parse_ns_duration() function currently uses prefix matching for
> detecting time units. This approach is problematic as it silently
> accepts malformed strings such as "100nsx" or "100us_invalid" by
> ignoring the trailing characters, leading to potential configuration
> errors.
>
> Switch to using strcmp() for suffix comparison to enforce exact matches.
> This ensures that the parser strictly validates the time unit and
> rejects any input containing invalid trailing characters, thereby
> improving the robustness of the configuration parsing.

This solution is incorrect. We need to be able to parse deadline
priority correctly, whose format includes two suffixes:

d:runtime[us|ms|s]:period[us|ms|s]
(see manpages)

and is parsed like this:

int parse_prio(char *arg, struct sched_attr *sched_param)
{
...
    switch (arg[0]) {
    case 'd':
    case 'D':
        /* d:runtime:period */
        if (strlen(arg) < 4)
            return -1;

        runtime = get_long_ns_after_colon(arg);
        if (runtime == INVALID_VAL)
            return -1;

        period = get_long_ns_after_colon(&arg[2]);
        if (period == INVALID_VAL)
            return -1;
...

Your commit breaks that:

$ rtla timerlat -P d:10ms:100ms
Invalid -P priority

Tomas


^ permalink raw reply

* Re: [PATCH v4 4/5] mm: rename zone->lock to zone->_lock
From: Dmitry Ilvokhin @ 2026-03-04 13:01 UTC (permalink / raw)
  To: SeongJae Park
  Cc: Andrew Morton, David Hildenbrand, Lorenzo Stoakes,
	Liam R. Howlett, Vlastimil Babka, Mike Rapoport,
	Suren Baghdasaryan, Michal Hocko, Axel Rasmussen, Yuanchu Xie,
	Wei Xu, Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Rafael J. Wysocki, Pavel Machek, Len Brown, Brendan Jackman,
	Johannes Weiner, Zi Yan, Oscar Salvador, Qi Zheng, Shakeel Butt,
	linux-kernel, linux-mm, linux-trace-kernel, linux-pm
In-Reply-To: <20260304015035.84839-1-sj@kernel.org>

On Tue, Mar 03, 2026 at 05:50:34PM -0800, SeongJae Park wrote:
> On Tue, 3 Mar 2026 14:25:55 +0000 Dmitry Ilvokhin <d@ilvokhin.com> wrote:
> 
> > On Mon, Mar 02, 2026 at 02:37:43PM -0800, Andrew Morton wrote:
> > > On Mon, 2 Mar 2026 15:10:03 +0100 "Vlastimil Babka (SUSE)" <vbabka@kernel.org> wrote:
> > > 
> > > > On 2/27/26 17:00, Dmitry Ilvokhin wrote:
> > > > > This intentionally breaks direct users of zone->lock at compile time so
> > > > > all call sites are converted to the zone lock wrappers. Without the
> > > > > rename, present and future out-of-tree code could continue using
> > > > > spin_lock(&zone->lock) and bypass the wrappers and tracing
> > > > > infrastructure.
> > > > > 
> > > > > No functional change intended.
> > > > > 
> > > > > Suggested-by: Andrew Morton <akpm@linux-foundation.org>
> > > > > Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
> > > > > Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> > > > > Acked-by: SeongJae Park <sj@kernel.org>
> > > > 
> > > > I see some more instances of 'zone->lock' in comments in
> > > > include/linux/mmzone.h and under Documentation/ but otherwise LGTM.
> > > > 
> > > 
> > > I fixed (most of) that in the previous version but my fix was lost.
> > 
> > Thanks for the fixups, Andrew.
> > 
> > I still see a few 'zone->lock' references in Documentation remain on
> > mm-new. This patch cleans them up, as noted by Vlastimil.
> > 
> > I'm happy to adjust this patch if anything else needs attention.
> > 
> > From 9142d5a8b60038fa424a6033253960682e5a51f4 Mon Sep 17 00:00:00 2001
> > From: Dmitry Ilvokhin <d@ilvokhin.com>
> > Date: Tue, 3 Mar 2026 06:13:13 -0800
> > Subject: [PATCH] mm: fix remaining zone->lock references
> > 
> > Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
> > ---
> >  Documentation/mm/physical_memory.rst | 4 ++--
> >  Documentation/trace/events-kmem.rst  | 8 ++++----
> >  2 files changed, 6 insertions(+), 6 deletions(-)
> > 
> > diff --git a/Documentation/mm/physical_memory.rst b/Documentation/mm/physical_memory.rst
> > index b76183545e5b..e344f93515b6 100644
> > --- a/Documentation/mm/physical_memory.rst
> > +++ b/Documentation/mm/physical_memory.rst
> > @@ -500,11 +500,11 @@ General
> >  ``nr_isolate_pageblock``
> >    Number of isolated pageblocks. It is used to solve incorrect freepage counting
> >    problem due to racy retrieving migratetype of pageblock. Protected by
> > -  ``zone->lock``. Defined only when ``CONFIG_MEMORY_ISOLATION`` is enabled.
> > +  ``zone_lock``. Defined only when ``CONFIG_MEMORY_ISOLATION`` is enabled.
> 
> Dmitry's original patch [1] was doing 's/zone->lock/zone->_lock/', which aligns
> to my expectation.  But this patch is doing 's/zone->lock/zone_lock/'.  Same
> for the rest of this patch.
> 
> I was initially thinking this is just a mistake, but I also found Andrew is
> doing same change [2], so I'm bit confused.  Is this an intentional change?
> 
> [1] https://lore.kernel.org/d61500c5784c64e971f4d328c57639303c475f81.1772206930.git.d@ilvokhin.com
> [2] https://lore.kernel.org/20260302143743.220eed4feb36d7572fe726cc@linux-foundation.org
> 

Good catch, thanks for pointing this out, SJ.

Originally the mechanical rename was indeed zone->lock -> zone->_lock.
However, in Documentation I intentionally switched references to
zone_lock instead of zone->_lock. The reasoning is that _lock is now an
internal implementation detail, and direct access is discouraged. The
intended interface is via the zone_lock_*() / zone_unlock_*() wrappers,
so referencing zone_lock in documentation felt more appropriate than
mentioning the private struct field (zone->_lock).

That said, I agree this creates inconsistency with the mechanical
rename, and I'm happy to adjust either way: either consistently refer
to the wrapper API, or keep documentation aligned with zone->_lock.

I slightly prefer referring to the wrapper API, but don't have a strong
preference as long as we're consistent.

> 
> Thanks,
> SJ
> 
> [...]

^ permalink raw reply

* Re: [RFC PATCH 1/2] locking: add mutex_lock_nospin()
From: Peter Zijlstra @ 2026-03-04 12:41 UTC (permalink / raw)
  To: Yafang Shao
  Cc: mingo, will, boqun, longman, rostedt, mhiramat, mark.rutland,
	mathieu.desnoyers, linux-kernel, linux-trace-kernel, bpf
In-Reply-To: <CALOAHbDmmbcK-0mG=44rw3wbaTwS8drzYD-s2gg8g+rPQZNLCg@mail.gmail.com>

On Wed, Mar 04, 2026 at 07:52:06PM +0800, Yafang Shao wrote:
> On Wed, Mar 4, 2026 at 6:11 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Wed, Mar 04, 2026 at 05:37:31PM +0800, Yafang Shao wrote:
> > > On Wed, Mar 4, 2026 at 5:03 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > > >
> > > > On Wed, Mar 04, 2026 at 03:46:49PM +0800, Yafang Shao wrote:
> > > > > Introduce mutex_lock_nospin(), a helper that disables optimistic spinning
> > > > > on the owner for specific heavy locks. This prevents long spinning times
> > > > > that can lead to latency spikes for other tasks on the same runqueue.
> > > >
> > > > This makes no sense; spinning stops on need_resched().
> > >
> > > Hello Peter,
> > >
> > > The condition to stop spinning on need_resched() relies on the mutex
> > > owner remaining unchanged. However, when multiple tasks contend for
> > > the same lock, the owner can change frequently. This creates a
> > > potential TOCTOU (Time of Check to Time of Use) issue.
> > >
> > >   mutex_optimistic_spin
> > >       owner = __mutex_trylock_or_owner(lock);
> > >       mutex_spin_on_owner
> > >           // the __mutex_owner(lock) might get a new owner.
> > >           while (__mutex_owner(lock) == owner)
> > >
> >
> > How do these new owners become the owner? Are they succeeding the
> > __mutex_trylock() that sits before mutex_optimistic_spin() and
> > effectively starving the spinner?
> >
> > Something like the below would make a difference if that were so.
> 
> The following change made no difference; concurrent runs still result
> in prolonged system time.
> 
> real 0m5.265s user 0m0.000s sys 0m4.921s
> real 0m5.295s user 0m0.002s sys 0m4.697s
> real 0m5.293s user 0m0.003s sys 0m4.844s
> real 0m5.303s user 0m0.001s sys 0m4.511s
> real 0m5.303s user 0m0.000s sys 0m4.694s
> real 0m5.302s user 0m0.002s sys 0m4.677s
> real 0m5.313s user 0m0.000s sys 0m4.837s
> real 0m5.327s user 0m0.000s sys 0m4.808s
> real 0m5.330s user 0m0.001s sys 0m4.893s
> real 0m5.358s user 0m0.005s sys 0m4.919s
> 
> Our kernel is not built with CONFIG_PREEMPT enabled, so prolonged
> system time can lead to CPU pressure and potential latency spikes.
> Since we can reliably reproduce this unnecessary spinning, why not
> improve it to reduce the overhead?

If you cannot explain what the problem is (you haven't), there is
nothing to fix.

Also, current kernels cannot be build without PREEMPT; and if you care
about latency running a PREEMPT=n kernel is daft. That said,
TIF_NEED_RESCHED should work irrespective of PREEMPT settings, the
PREEMPT settings just affect when and how you end up in schedule().

Even without PREEMPT, if there is a task waiting either the wakeup or
the tick will set TIF_NEED_RESCHED and it should stop spinning. If there
is no task waiting, there is no actual latency, just burning CPU time,
and that isn't a problem per-se.

What should happen is that the first spinner gets the lock next, the
next spinner is then promoted to first spinner and so on.

This chain continues, which means the lock owner is always
on-cpu and good progress is being made and there is no CPU contention,
or the spinner gets marked for preemption (as said, this does not
require PREEMPT=y) and will stop spinning and go sleep, or the owner
goes to sleep and all the spinners stop and also go sleep.

Again, you have not said anything specific enough to figure out what
happens on your end. You said the owner changes, this means there is
progress made. What isn't clear is if any one particular spinner is
starved (that would be a problem) or if this latency spike you observe
is worse than would be from running a while(1) loop, in which case,
that's just how it is.

What is not sane, is marking random locks with random properties just
because random workload.

^ permalink raw reply

* Re: [RFC PATCH 1/2] locking: add mutex_lock_nospin()
From: Yafang Shao @ 2026-03-04 11:52 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, will, boqun, longman, rostedt, mhiramat, mark.rutland,
	mathieu.desnoyers, linux-kernel, linux-trace-kernel, bpf
In-Reply-To: <20260304101111.GQ606826@noisy.programming.kicks-ass.net>

On Wed, Mar 4, 2026 at 6:11 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Wed, Mar 04, 2026 at 05:37:31PM +0800, Yafang Shao wrote:
> > On Wed, Mar 4, 2026 at 5:03 PM Peter Zijlstra <peterz@infradead.org> wrote:
> > >
> > > On Wed, Mar 04, 2026 at 03:46:49PM +0800, Yafang Shao wrote:
> > > > Introduce mutex_lock_nospin(), a helper that disables optimistic spinning
> > > > on the owner for specific heavy locks. This prevents long spinning times
> > > > that can lead to latency spikes for other tasks on the same runqueue.
> > >
> > > This makes no sense; spinning stops on need_resched().
> >
> > Hello Peter,
> >
> > The condition to stop spinning on need_resched() relies on the mutex
> > owner remaining unchanged. However, when multiple tasks contend for
> > the same lock, the owner can change frequently. This creates a
> > potential TOCTOU (Time of Check to Time of Use) issue.
> >
> >   mutex_optimistic_spin
> >       owner = __mutex_trylock_or_owner(lock);
> >       mutex_spin_on_owner
> >           // the __mutex_owner(lock) might get a new owner.
> >           while (__mutex_owner(lock) == owner)
> >
>
> How do these new owners become the owner? Are they succeeding the
> __mutex_trylock() that sits before mutex_optimistic_spin() and
> effectively starving the spinner?
>
> Something like the below would make a difference if that were so.

The following change made no difference; concurrent runs still result
in prolonged system time.

real 0m5.265s user 0m0.000s sys 0m4.921s
real 0m5.295s user 0m0.002s sys 0m4.697s
real 0m5.293s user 0m0.003s sys 0m4.844s
real 0m5.303s user 0m0.001s sys 0m4.511s
real 0m5.303s user 0m0.000s sys 0m4.694s
real 0m5.302s user 0m0.002s sys 0m4.677s
real 0m5.313s user 0m0.000s sys 0m4.837s
real 0m5.327s user 0m0.000s sys 0m4.808s
real 0m5.330s user 0m0.001s sys 0m4.893s
real 0m5.358s user 0m0.005s sys 0m4.919s

Our kernel is not built with CONFIG_PREEMPT enabled, so prolonged
system time can lead to CPU pressure and potential latency spikes.
Since we can reliably reproduce this unnecessary spinning, why not
improve it to reduce the overhead?

-- 
Regards
Yafang

^ permalink raw reply

* Re: [PATCH v3 16/18] rtla/trace: Fix I/O handling in save_trace_to_file()
From: Tomas Glozar @ 2026-03-04 10:30 UTC (permalink / raw)
  To: Wander Lairson Costa
  Cc: Steven Rostedt, Ivan Pravdin, Crystal Wood, Costa Shulyupin,
	John Kacur, Haiyong Sun, Tiezhu Yang, Daniel Wagner,
	Daniel Bristot de Oliveira,
	open list:Real-time Linux Analysis (RTLA) tools,
	open list:Real-time Linux Analysis (RTLA) tools,
	open list:BPF [MISC]:Keyword:(?:b|_)bpf(?:b|_)
In-Reply-To: <20260115163650.118910-17-wander@redhat.com>

čt 15. 1. 2026 v 18:29 odesílatel Wander Lairson Costa
<wander@redhat.com> napsal:
> diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c
> index fed3362527b08..8e93b48d33ef8 100644
> --- a/tools/tracing/rtla/src/trace.c
> +++ b/tools/tracing/rtla/src/trace.c
> @@ -73,6 +73,7 @@ int save_trace_to_file(struct tracefs_instance *inst, const char *filename)
>         char buffer[4096];
>         int out_fd, in_fd;
>         int retval = -1;
> +       ssize_t n_read;
>
>         if (!inst || !filename)
>                 return 0;
> @@ -90,15 +91,30 @@ int save_trace_to_file(struct tracefs_instance *inst, const char *filename)
>                 goto out_close_in;
>         }
>
> -       do {
> -               retval = read(in_fd, buffer, sizeof(buffer));
> -               if (retval <= 0)
> +       for (;;) {
> +               n_read = read(in_fd, buffer, sizeof(buffer));
> +               if (n_read < 0) {
> +                       if (errno == EINTR)
> +                               continue;
> +                       err_msg("Error reading trace file: %s\n", strerror(errno));
>                         goto out_close;
> +               }
> +               if (n_read == 0)
> +                       break;
>
> -               retval = write(out_fd, buffer, retval);
> -               if (retval < 0)
> -                       goto out_close;
> -       } while (retval > 0);
> +               ssize_t n_written = 0;

Why break the style of declaring all variables at the beginning of the
function? n_read, added in the same commit, keeps the style.

This also applies to the previous patch.

Tomas


^ permalink raw reply

* Re: [RFC PATCH 1/2] locking: add mutex_lock_nospin()
From: Peter Zijlstra @ 2026-03-04 10:11 UTC (permalink / raw)
  To: Yafang Shao
  Cc: mingo, will, boqun, longman, rostedt, mhiramat, mark.rutland,
	mathieu.desnoyers, linux-kernel, linux-trace-kernel, bpf
In-Reply-To: <CALOAHbC1AHc2eZrzafz7ynrW7NFPrVmervCY_jjWsRTo0gBwQQ@mail.gmail.com>

On Wed, Mar 04, 2026 at 05:37:31PM +0800, Yafang Shao wrote:
> On Wed, Mar 4, 2026 at 5:03 PM Peter Zijlstra <peterz@infradead.org> wrote:
> >
> > On Wed, Mar 04, 2026 at 03:46:49PM +0800, Yafang Shao wrote:
> > > Introduce mutex_lock_nospin(), a helper that disables optimistic spinning
> > > on the owner for specific heavy locks. This prevents long spinning times
> > > that can lead to latency spikes for other tasks on the same runqueue.
> >
> > This makes no sense; spinning stops on need_resched().
> 
> Hello Peter,
> 
> The condition to stop spinning on need_resched() relies on the mutex
> owner remaining unchanged. However, when multiple tasks contend for
> the same lock, the owner can change frequently. This creates a
> potential TOCTOU (Time of Check to Time of Use) issue.
> 
>   mutex_optimistic_spin
>       owner = __mutex_trylock_or_owner(lock);
>       mutex_spin_on_owner
>           // the __mutex_owner(lock) might get a new owner.
>           while (__mutex_owner(lock) == owner)
> 

How do these new owners become the owner? Are they succeeding the
__mutex_trylock() that sits before mutex_optimistic_spin() and
effectively starving the spinner?

Something like the below would make a difference if that were so.

---
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index c867f6c15530..0796e77a8c3b 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -521,7 +521,7 @@ static __always_inline bool
 mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
 		      struct mutex_waiter *waiter)
 {
-	return false;
+	return __mutex_trylock(lock);
 }
 #endif
 
@@ -614,8 +614,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
 	mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
 
 	trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
-	if (__mutex_trylock(lock) ||
-	    mutex_optimistic_spin(lock, ww_ctx, NULL)) {
+	if (mutex_optimistic_spin(lock, ww_ctx, NULL)) {
 		/* got the lock, yay! */
 		lock_acquired(&lock->dep_map, ip);
 		if (ww_ctx)


^ permalink raw reply related

* Re: [RFC PATCH 1/2] locking: add mutex_lock_nospin()
From: David Laight @ 2026-03-04  9:54 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Yafang Shao, mingo, will, boqun, longman, rostedt, mhiramat,
	mark.rutland, mathieu.desnoyers, linux-kernel, linux-trace-kernel,
	bpf
In-Reply-To: <20260304090249.GN606826@noisy.programming.kicks-ass.net>

On Wed, 4 Mar 2026 10:02:49 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> On Wed, Mar 04, 2026 at 03:46:49PM +0800, Yafang Shao wrote:
> > Introduce mutex_lock_nospin(), a helper that disables optimistic spinning
> > on the owner for specific heavy locks. This prevents long spinning times
> > that can lead to latency spikes for other tasks on the same runqueue.  
> 
> This makes no sense; spinning stops on need_resched().
> 

That might still be an issue if a high priority process is spinning.
But a %sys spike doesn't imply a latency spike.

Is this using the osq_lock.c code?
That will have problems on overprovisioned VMs, it tries to find out
whether the hypervisor has switched out - but ISTR that is flawed.

In reality a spin lock shouldn't be held for long enough to cause
any kind latency issue.
So something in the code that reads the list of filter functions
needs to be done differently so that the lock isn't held for as long.

	David

^ permalink raw reply

* Re: [RFC PATCH 1/2] locking: add mutex_lock_nospin()
From: Yafang Shao @ 2026-03-04  9:37 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: mingo, will, boqun, longman, rostedt, mhiramat, mark.rutland,
	mathieu.desnoyers, linux-kernel, linux-trace-kernel, bpf
In-Reply-To: <20260304090249.GN606826@noisy.programming.kicks-ass.net>

On Wed, Mar 4, 2026 at 5:03 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Wed, Mar 04, 2026 at 03:46:49PM +0800, Yafang Shao wrote:
> > Introduce mutex_lock_nospin(), a helper that disables optimistic spinning
> > on the owner for specific heavy locks. This prevents long spinning times
> > that can lead to latency spikes for other tasks on the same runqueue.
>
> This makes no sense; spinning stops on need_resched().

Hello Peter,

The condition to stop spinning on need_resched() relies on the mutex
owner remaining unchanged. However, when multiple tasks contend for
the same lock, the owner can change frequently. This creates a
potential TOCTOU (Time of Check to Time of Use) issue.

  mutex_optimistic_spin
      owner = __mutex_trylock_or_owner(lock);
      mutex_spin_on_owner
          // the __mutex_owner(lock) might get a new owner.
          while (__mutex_owner(lock) == owner)

We observed high CPU pressure in production when this scenario occurred.

Below are the benchmark results when running 10 concurrent tasks:

for i in `seq 0 9`; do
        time cat /sys/kernel/debug/tracing/available_filter_functions
> /dev/null &
done

- before this patch

real    0m4.636s    user 0m0.001s    sys 0m3.773s
real    0m5.157s    user 0m0.001s    sys 0m4.362s
real    0m5.205s    user 0m0.000s    sys 0m4.538s
real    0m5.212s    user 0m0.001s    sys 0m4.700s
real    0m5.246s    user 0m0.001s    sys 0m4.501s
real    0m5.254s    user 0m0.003s    sys 0m4.335s
real    0m5.260s    user 0m0.003s    sys 0m4.525s
real    0m5.267s    user 0m0.004s    sys 0m4.482s
real    0m5.273s    user 0m0.002s    sys 0m4.215s
real    0m5.285s    user 0m0.003s    sys 0m4.373s


- after this patch

real    0m4.733s    user 0m0.002s    sys 0m0.511s
real    0m4.740s    user 0m0.001s    sys 0m0.509s
real    0m4.862s    user 0m0.001s    sys 0m0.513s
real    0m4.884s    user 0m0.000s    sys 0m0.507s
real    0m4.888s    user 0m0.003s    sys 0m0.513s
real    0m4.888s    user 0m0.000s    sys 0m0.511s
real    0m4.886s    user 0m0.003s    sys 0m0.508s
real    0m4.952s    user 0m0.000s    sys 0m0.513s
real    0m4.973s    user 0m0.001s    sys 0m0.510s
real    0m5.042s    user 0m0.002s    sys 0m0.515s

The results show that system time dropped dramatically from ~4.5
seconds to ~0.5 seconds, confirming that the patch can help reduce the
issue.

Please correct me if I've misunderstood anything.

-- 
Regards
Yafang

^ permalink raw reply

* Re: [PATCH v2 000/110] vfs: change inode->i_ino from unsigned long to u64
From: David Laight @ 2026-03-04  9:30 UTC (permalink / raw)
  To: NeilBrown
  Cc: Jeff Layton, linux-fsdevel, linux-kernel, linux-trace-kernel,
	nvdimm, fsverity, linux-mm, netfs, linux-ext4, linux-f2fs-devel,
	linux-nfs, linux-cifs, samba-technical, linux-nilfs, v9fs,
	linux-afs, autofs, ceph-devel, codalist, ecryptfs, linux-mtd,
	jfs-discussion, ntfs3, ocfs2-devel, devel, linux-unionfs,
	apparmor, linux-security-module, linux-integrity, selinux,
	amd-gfx, dri-devel, linux-media, linaro-mm-sig, netdev,
	linux-perf-users, linux-fscrypt, linux-xfs, linux-hams, linux-x25,
	audit, linux-bluetooth, linux-can, linux-sctp, bpf
In-Reply-To: <177260561903.7472.14075475865748618717@noble.neil.brown.name>

On Wed, 04 Mar 2026 17:26:59 +1100
NeilBrown <neilb@ownmail.net> wrote:

> On Tue, 03 Mar 2026, Jeff Layton wrote:
> > On Tue, 2026-03-03 at 10:55 +0000, David Howells wrote:  
> > > Jeff Layton <jlayton@kernel.org> wrote:
> > >   
> > > > This version splits the change up to be more bisectable. It first adds a
> > > > new kino_t typedef and a new "PRIino" macro to hold the width specifier
> > > > for format strings. The conversion is done, and then everything is
> > > > changed to remove the new macro and typedef.  
> > > 
> > > Why remove the typedef?  It might be better to keep it.
> > >   
> > 
> > Why? After this change, internel kernel inodes will be u64's -- full
> > stop. I don't see what the macro or typedef will buy us at that point.  
> 
> Implicit documentation?
> ktime_t is (now) always s64, but we still keep the typedef;
> 
> It would be cool if we could teach vsprintf to understand some new
> specifier to mean "kinode_t" or "ktime_t" etc.  But that would trigger
> gcc warnings.

A more interesting one would be something that made gcc re-write the
format with the correct 'length modifier' for the parameter.

That would save a lot of effort!

	David

> 
> NeilBrown
> 


^ permalink raw reply

* [PATCH] kprobes: Remove unneeded warnings from __arm_kprobe_ftrace()
From: Masami Hiramatsu (Google) @ 2026-03-04  9:08 UTC (permalink / raw)
  To: Naveen N Rao, Masami Hiramatsu, Steven Rostedt
  Cc: Zw Tang, linux-kernel, linux-trace-kernel, linux-perf-users,
	Arnaldo Carvalho de Melo, Sasha Levin, David S . Miller

From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Remove unneeded warnings for handled errors from __arm_kprobe_ftrace()
because all caller handled the error correctly.

Reported-by: Zw Tang <shicenci@gmail.com>
Closes: https://lore.kernel.org/all/CAPHJ_V+J6YDb_wX2nhXU6kh466Dt_nyDSas-1i_Y8s7tqY-Mzw@mail.gmail.com/
Fixes: 9c89bb8e3272 ("kprobes: treewide: Cleanup the error messages for kprobes")
Cc: stable@vger.kernel.org
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/kprobes.c |    4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ab25b4aa9095..2c2b7bd24dd4 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1144,12 +1144,12 @@ static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
 	lockdep_assert_held(&kprobe_mutex);
 
 	ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0);
-	if (WARN_ONCE(ret < 0, "Failed to arm kprobe-ftrace at %pS (error %d)\n", p->addr, ret))
+	if (ret < 0)
 		return ret;
 
 	if (*cnt == 0) {
 		ret = register_ftrace_function(ops);
-		if (WARN(ret < 0, "Failed to register kprobe-ftrace (error %d)\n", ret)) {
+		if (ret < 0) {
 			/*
 			 * At this point, sinec ops is not registered, we should be sefe from
 			 * registering empty filter.


^ permalink raw reply related

* Re: [PATCH 1/2] tracing: Have futex syscall trace event show specific user data
From: Peter Zijlstra @ 2026-03-04  9:07 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: linux-kernel, linux-trace-kernel, Masami Hiramatsu, Mark Rutland,
	Mathieu Desnoyers, Andrew Morton, Thomas Gleixner, Brian Geffon,
	John Stultz, Ian Rogers, Suleiman Souhlal
In-Reply-To: <20260303214942.428502100@kernel.org>

On Tue, Mar 03, 2026 at 04:47:36PM -0500, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
> 
> Add specific reporting of the futex system call. This allows for debugging
> the futex code a bit easier. Instead of just showing the values passed
> into the futex system call, read the value of the user space memory
> pointed to by the addr parameter.
> 
> Also make the op parameter more readable by parsing the values to show
> what the command is:
> 
>  futex_requeue_p-3251    [002] .....  2101.068479: sys_futex(uaddr: 0x55e79a4da834 (0x80000cb1), FUTEX_LOCK_PI|FUTEX_PRIVATE_FLAG, val: 0)
>  futex_requeue_p-3248    [001] .....  2101.068970: sys_futex(uaddr: 0x7f859072f990 (0xcb2), FUTEX_WAIT_BITSET|FUTEX_CLOCK_REALTIME, val: 3250)
>  futex_requeue_p-3252    [005] .....  2101.069108: sys_futex(uaddr: 0x55e79a4da838 (0), FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG, val: 0, timespec: 0x7ffe61076aa0, uaddr2: 0x55e79a4da834, uaddr2: 94453214586932, val3: 0)
>  futex_requeue_p-3252    [005] .....  2101.069410: sys_futex(uaddr: 0x55e79a4da834 (0x80000cb1), FUTEX_LOCK_PI|FUTEX_PRIVATE_FLAG, val: 0)
> 
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> ---
>  kernel/trace/trace_syscalls.c | 266 +++++++++++++++++++++++++++++++++-
>  1 file changed, 263 insertions(+), 3 deletions(-)

Egads, I really dislike how all sorts of syscall crud is 'duplicated' in
this file, rather than near or in the actual syscall definition.


^ permalink raw reply

* Re: [RFC PATCH 1/2] locking: add mutex_lock_nospin()
From: Peter Zijlstra @ 2026-03-04  9:02 UTC (permalink / raw)
  To: Yafang Shao
  Cc: mingo, will, boqun, longman, rostedt, mhiramat, mark.rutland,
	mathieu.desnoyers, linux-kernel, linux-trace-kernel, bpf
In-Reply-To: <20260304074650.58165-2-laoar.shao@gmail.com>

On Wed, Mar 04, 2026 at 03:46:49PM +0800, Yafang Shao wrote:
> Introduce mutex_lock_nospin(), a helper that disables optimistic spinning
> on the owner for specific heavy locks. This prevents long spinning times
> that can lead to latency spikes for other tasks on the same runqueue.

This makes no sense; spinning stops on need_resched().

^ permalink raw reply

* Re: [syzbot] [bpf?] [trace?] KASAN: slab-use-after-free Read in bpf_trace_run3 (2)
From: Qing Wang @ 2026-03-04  8:07 UTC (permalink / raw)
  To: syzbot+9ea7c90be2b24e189592
  Cc: andrii, ast, bpf, daniel, eddyz87, haoluo, john.fastabend, jolsa,
	kpsingh, linux-kernel, linux-trace-kernel, martin.lau,
	mathieu.desnoyers, mattbobrowski, mhiramat, rostedt, sdf, song,
	syzkaller-bugs, wangqing7171, yonghong.song
In-Reply-To: <69a7e4df.050a0220.21ae90.0014.GAE@google.com>

On Wed, 04 Mar 2026 at 15:53, syzbot <syzbot+9ea7c90be2b24e189592@syzkaller.appspotmail.com> wrote:
> Hello,
> 
> syzbot has tested the proposed patch and the reproducer did not trigger any issue:
> 
> Reported-by: syzbot+9ea7c90be2b24e189592@syzkaller.appspotmail.com
> Tested-by: syzbot+9ea7c90be2b24e189592@syzkaller.appspotmail.com
> 
> Tested on:
> 
> commit:         0031c068 Merge tag 'cgroup-for-7.0-rc2-fixes' of git:/..
> git tree:       upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=12afb006580000
> kernel config:  https://syzkaller.appspot.com/x/.config?x=c5c49ee0942d1cdb
> dashboard link: https://syzkaller.appspot.com/bug?extid=9ea7c90be2b24e189592
> compiler:       Debian clang version 21.1.8 (++20251221033036+2078da43e25a-1~exp1~20251221153213.50), Debian LLD 21.1.8
> patch:          https://syzkaller.appspot.com/x/patch.diff?x=1566e552580000
> 
> Note: testing is done by a robot and is best-effort only.

It's same as [0].
 [0] https://syzkaller.appspot.com/bug?extid=b4c5ad098c821bf8d8bc

I sent the fixed patch [1] to [0].
 [1] https://lore.kernel.org/all/20260304070927.178464-1-wangqing7171@gmail.com/T/

--
Qing

^ permalink raw reply

* Re: [syzbot] [bpf?] [trace?] KASAN: slab-use-after-free Read in bpf_trace_run3 (2)
From: syzbot @ 2026-03-04  7:53 UTC (permalink / raw)
  To: andrii, ast, bpf, daniel, eddyz87, haoluo, john.fastabend, jolsa,
	kpsingh, linux-kernel, linux-trace-kernel, martin.lau,
	mathieu.desnoyers, mattbobrowski, mhiramat, rostedt, sdf, song,
	syzkaller-bugs, wangqing7171, yonghong.song
In-Reply-To: <20260304072616.185060-1-wangqing7171@gmail.com>

Hello,

syzbot has tested the proposed patch and the reproducer did not trigger any issue:

Reported-by: syzbot+9ea7c90be2b24e189592@syzkaller.appspotmail.com
Tested-by: syzbot+9ea7c90be2b24e189592@syzkaller.appspotmail.com

Tested on:

commit:         0031c068 Merge tag 'cgroup-for-7.0-rc2-fixes' of git:/..
git tree:       upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=12afb006580000
kernel config:  https://syzkaller.appspot.com/x/.config?x=c5c49ee0942d1cdb
dashboard link: https://syzkaller.appspot.com/bug?extid=9ea7c90be2b24e189592
compiler:       Debian clang version 21.1.8 (++20251221033036+2078da43e25a-1~exp1~20251221153213.50), Debian LLD 21.1.8
patch:          https://syzkaller.appspot.com/x/patch.diff?x=1566e552580000

Note: testing is done by a robot and is best-effort only.

^ permalink raw reply

* [RFC PATCH 2/2] ftrace: disable optimistic spinning for ftrace_lock
From: Yafang Shao @ 2026-03-04  7:46 UTC (permalink / raw)
  To: peterz, mingo, will, boqun, longman, rostedt, mhiramat,
	mark.rutland, mathieu.desnoyers
  Cc: linux-kernel, linux-trace-kernel, bpf, Yafang Shao
In-Reply-To: <20260304074650.58165-1-laoar.shao@gmail.com>

mutex_lock_nospin() is used for ftrace_lock to selectively disable
optimistic spinning.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 kernel/trace/ftrace.c | 52 +++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 827fb9a0bf0d..b8cca4f76118 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1284,7 +1284,7 @@ static void clear_ftrace_mod_list(struct list_head *head)
 	if (!head)
 		return;
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 	list_for_each_entry_safe(p, n, head, list)
 		free_ftrace_mod(p);
 	mutex_unlock(&ftrace_lock);
@@ -4254,7 +4254,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
 	void *p = NULL;
 	loff_t l;
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	if (unlikely(ftrace_disabled))
 		return NULL;
@@ -4362,7 +4362,7 @@ static __init void ftrace_check_work_func(struct work_struct *work)
 	struct ftrace_page *pg;
 	struct dyn_ftrace *rec;
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 	do_for_each_ftrace_rec(pg, rec) {
 		test_for_valid_rec(rec);
 	} while_for_each_ftrace_rec();
@@ -5123,7 +5123,7 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
 	if (!new_hash)
 		goto out; /* warn? */
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	list_for_each_entry_safe(ftrace_mod, n, head, list) {
 
@@ -5159,7 +5159,7 @@ static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
 	if (enable && list_empty(head))
 		new_hash->flags &= ~FTRACE_HASH_FL_MOD;
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	ftrace_hash_move_and_update_ops(ops, orig_hash,
 					      new_hash, enable);
@@ -5465,7 +5465,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr,
 		return -EINVAL;
 
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 	/* Check if the probe_ops is already registered */
 	list_for_each_entry(iter, &tr->func_probes, list) {
 		if (iter->probe_ops == probe_ops) {
@@ -5540,7 +5540,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr,
 		}
 	}
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	if (!count) {
 		/* Nothing was added? */
@@ -5619,7 +5619,7 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
 			return -EINVAL;
 	}
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 	/* Check if the probe_ops is already registered */
 	list_for_each_entry(iter, &tr->func_probes, list) {
 		if (iter->probe_ops == probe_ops) {
@@ -5679,7 +5679,7 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr,
 		goto out_unlock;
 	}
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	WARN_ON(probe->ref < count);
 
@@ -5943,7 +5943,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
 			goto out_regex_unlock;
 	}
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 	ret = ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable);
 	mutex_unlock(&ftrace_lock);
 
@@ -6205,7 +6205,7 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr)
 	 * Now the ftrace_ops_list_func() is called to do the direct callers.
 	 * We can safely change the direct functions attached to each entry.
 	 */
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	size = 1 << hash->size_bits;
 	for (i = 0; i < size; i++) {
@@ -6625,7 +6625,7 @@ int update_ftrace_direct_mod(struct ftrace_ops *ops, struct ftrace_hash *hash, b
 	 * Now the ftrace_ops_list_func() is called to do the direct callers.
 	 * We can safely change the direct functions attached to each entry.
 	 */
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	size = 1 << hash->size_bits;
 	for (i = 0; i < size; i++) {
@@ -6980,7 +6980,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
 		} else
 			orig_hash = &iter->ops->func_hash->notrace_hash;
 
-		mutex_lock(&ftrace_lock);
+		mutex_lock_nospin(&ftrace_lock);
 		ftrace_hash_move_and_update_ops(iter->ops, orig_hash,
 						      iter->hash, filter_hash);
 		mutex_unlock(&ftrace_lock);
@@ -7464,7 +7464,7 @@ void ftrace_create_filter_files(struct ftrace_ops *ops,
  */
 void ftrace_destroy_filter_files(struct ftrace_ops *ops)
 {
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 	if (ops->flags & FTRACE_OPS_FL_ENABLED)
 		ftrace_shutdown(ops, 0);
 	ops->flags |= FTRACE_OPS_FL_DELETED;
@@ -7571,7 +7571,7 @@ static int ftrace_process_locs(struct module *mod,
 	if (!start_pg)
 		return -ENOMEM;
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	/*
 	 * Core and each module needs their own pages, as
@@ -7868,7 +7868,7 @@ void ftrace_release_mod(struct module *mod)
 	struct ftrace_page *tmp_page = NULL;
 	struct ftrace_page *pg;
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	/*
 	 * To avoid the UAF problem after the module is unloaded, the
@@ -7938,7 +7938,7 @@ void ftrace_module_enable(struct module *mod)
 	struct dyn_ftrace *rec;
 	struct ftrace_page *pg;
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	if (ftrace_disabled)
 		goto out_unlock;
@@ -8267,7 +8267,7 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr)
 	key.ip = start;
 	key.flags = end;	/* overload flags, as it is unsigned long */
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	/*
 	 * If we are freeing module init memory, then check if
@@ -8686,7 +8686,7 @@ static void clear_ftrace_pids(struct trace_array *tr, int type)
 
 void ftrace_clear_pids(struct trace_array *tr)
 {
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	clear_ftrace_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
 
@@ -8695,7 +8695,7 @@ void ftrace_clear_pids(struct trace_array *tr)
 
 static void ftrace_pid_reset(struct trace_array *tr, int type)
 {
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 	clear_ftrace_pids(tr, type);
 
 	ftrace_update_pid_func();
@@ -8713,7 +8713,7 @@ static void *fpid_start(struct seq_file *m, loff_t *pos)
 	struct trace_pid_list *pid_list;
 	struct trace_array *tr = m->private;
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 	rcu_read_lock_sched();
 
 	pid_list = rcu_dereference_sched(tr->function_pids);
@@ -8766,7 +8766,7 @@ static void *fnpid_start(struct seq_file *m, loff_t *pos)
 	struct trace_pid_list *pid_list;
 	struct trace_array *tr = m->private;
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 	rcu_read_lock_sched();
 
 	pid_list = rcu_dereference_sched(tr->function_no_pids);
@@ -9057,7 +9057,7 @@ static int prepare_direct_functions_for_ipmodify(struct ftrace_ops *ops)
 			unsigned long ip = entry->ip;
 			bool found_op = false;
 
-			mutex_lock(&ftrace_lock);
+			mutex_lock_nospin(&ftrace_lock);
 			do_for_each_ftrace_op(op, ftrace_ops_list) {
 				if (!(op->flags & FTRACE_OPS_FL_DIRECT))
 					continue;
@@ -9106,7 +9106,7 @@ static void cleanup_direct_functions_after_ipmodify(struct ftrace_ops *ops)
 			unsigned long ip = entry->ip;
 			bool found_op = false;
 
-			mutex_lock(&ftrace_lock);
+			mutex_lock_nospin(&ftrace_lock);
 			do_for_each_ftrace_op(op, ftrace_ops_list) {
 				if (!(op->flags & FTRACE_OPS_FL_DIRECT))
 					continue;
@@ -9153,7 +9153,7 @@ static int register_ftrace_function_nolock(struct ftrace_ops *ops)
 
 	ftrace_ops_init(ops);
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 
 	ret = ftrace_startup(ops, 0);
 
@@ -9200,7 +9200,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
 {
 	int ret;
 
-	mutex_lock(&ftrace_lock);
+	mutex_lock_nospin(&ftrace_lock);
 	ret = ftrace_shutdown(ops, 0);
 	mutex_unlock(&ftrace_lock);
 
-- 
2.47.3


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox