Building the Linux kernel with Clang and LLVM
 help / color / mirror / Atom feed
From: Bert Karwatzki <spasswolf@web.de>
To: Johannes Berg <johannes@sipsolutions.net>,
	"linux-kernel@vger.kernel.org"	 <linux-kernel@vger.kernel.org>
Cc: "linux-next@vger.kernel.org" <linux-next@vger.kernel.org>,
	 "llvm@lists.linux.dev" <llvm@lists.linux.dev>,
	Thomas Gleixner <tglx@linutronix.de>,
	 linux-wireless@vger.kernel.org,
	Jason Xing <kerneljasonxing@gmail.com>,
	 spasswolf@web.de
Subject: Re: lockup and kernel panic in linux-next-202505{09,12} when compiled with clang
Date: Sat, 17 May 2025 21:49:24 +0200	[thread overview]
Message-ID: <15f3633cbd08b30475d5b76c5cc9180fbf17a12f.camel@web.de> (raw)
In-Reply-To: <388bbc4c805ce029bbd08010fd30405494f998a9.camel@web.de>

Am Samstag, dem 17.05.2025 um 13:34 +0200 schrieb Bert Karwatzki:
> Am Freitag, dem 16.05.2025 um 20:19 +0200 schrieb Bert Karwatzki:
> > I've added a debugging statement:
> > 
> > diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
> > index 3bd5ee0995fe..853493eca4f5 100644
> > --- a/net/mac80211/tx.c
> > +++ b/net/mac80211/tx.c
> > @@ -4586,7 +4586,11 @@ static noinline void ieee80211_8023_xmit_clang_debug_helper(struct sk_buff *skb,
> >                                                             struct ieee80211_local *local,
> >                                                             struct ieee80211_tx_info *info)
> >  {
> > -       if (unlikely(skb->sk && sock_flag(skb->sk, SOCK_WIFI_STATUS))) {
> > +       if (unlikely(skb->sk && ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ||
> > +                               sock_flag(skb->sk, SOCK_WIFI_STATUS)))) {
> > +               if ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ^ sock_flag(skb->sk, SOCK_WIFI_STATUS))
> > +                       printk(KERN_INFO "%s: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = %u sock_flag(skb->sk,
> > SOCK_WIFI_STATUS) = %u\n",
> > +                                       __func__, (skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS), sock_flag(skb->sk,
> > SOCK_WIFI_STATUS));
> >                 info->status_data = ieee80211_store_ack_skb(local, skb,
> >                                                             &info->flags, NULL);
> >                 if (info->status_data)
> > 
> > This gives the following logoutput (and a lockup), indicating that sock_flag(skb->sk, SOCK_WIFI_STATUS) and
> > (skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) are actually NOT equivalent (when compiled with clang and
> > PREEMPT_RT=y)
> 
> I've added more debugging output:
> 
> diff --git a/include/net/sock.h b/include/net/sock.h
> index e223102337c7..e13560b5b7a8 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -2735,8 +2735,10 @@ static inline void _sock_tx_timestamp(struct sock *sk,
>  				*tskey = atomic_inc_return(&sk->sk_tskey) - 1;
>  		}
>  	}
> -	if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
> +	if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) {
> +		printk(KERN_INFO "%s: setting SKBTX_WIFI_STATUS for sk = %px\n", __func__, sk);
>  		*tx_flags |= SKBTX_WIFI_STATUS;
> +	}
>  }
>  
>  static inline void sock_tx_timestamp(struct sock *sk,
> diff --git a/net/core/sock.c b/net/core/sock.c
> index e02a78538e3e..f6589ad5ba36 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1548,6 +1548,7 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
>  		break;
>  
>  	case SO_WIFI_STATUS:
> +		printk(KERN_INFO "%s: setting SOCK_WIFI_STATUS to %u for sk = %px\n", __func__, valbool, sk);
>  		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
>  		break;
>  
> diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
> index 853493eca4f5..eee2f80949c6 100644
> --- a/net/mac80211/tx.c
> +++ b/net/mac80211/tx.c
> @@ -4588,9 +4588,12 @@ static noinline void ieee80211_8023_xmit_clang_debug_helper(struct sk_buff *skb,
>  {
>  	if (unlikely(skb->sk && ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ||
>  				sock_flag(skb->sk, SOCK_WIFI_STATUS)))) {
> -		if ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ^ sock_flag(skb->sk, SOCK_WIFI_STATUS))
> +		if ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ^ sock_flag(skb->sk, SOCK_WIFI_STATUS)) {
>  			printk(KERN_INFO "%s: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = %u sock_flag(skb->sk, SOCK_WIFI_STATUS) = %u\n",
>  					__func__, (skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS), sock_flag(skb->sk, SOCK_WIFI_STATUS));
> +			printk(KERN_INFO "%s: skb->sk = %px skb->sk->sk_flags = 0x%lx\n", __func__, skb->sk, skb->sk->sk_flags);
> +			return; // This should make this case non-fatal.
> +		}
>  		info->status_data = ieee80211_store_ack_skb(local, skb,
>  							    &info->flags, NULL);
>  		if (info->status_data)
> 
> 
> 
> This gives after ~15min uptime
> 
> [  189.337797] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
> [  189.337803] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1b798c4e00 skb->sk->sk_flags = 0xffffffffb4efe640
> [  191.325256] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
> [  191.325259] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1b798c5a00 skb->sk->sk_flags = 0xffffffffb4efe640
> [  257.591831] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
> [  257.591844] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1baf3bca00 skb->sk->sk_flags = 0xffffffffb4efe640
> [  301.786963] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
> [  301.786967] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1c1bc40100 skb->sk->sk_flags = 0xffffffffb4efe640
> [  302.780881] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
> [  302.780884] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1a44cf6000 skb->sk->sk_flags = 0xffffffffb4efe640
> [  482.792298] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
> [  482.792304] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1da0f4de00 skb->sk->sk_flags = 0xffffffffb4efe640
> [  482.806144] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
> [  482.806148] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1da0f4c500 skb->sk->sk_flags = 0xffffffffb4efe640
> [  482.817280] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
> [  482.817284] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1da0f4df00 skb->sk->sk_flags = 0xffffffffb4efe640
> [  552.327291] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
> [  552.327295] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1da0f4de00 skb->sk->sk_flags = 0xffffffffb4efe640
> [  916.971599] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS = 0 sock_flag(skb->sk, SOCK_WIFI_STATUS) = 1
> [  916.971607] [    T576] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8c1a62834000 skb->sk->sk_flags = 0xffffffffb4efe640
> 
> The printk()s in sk_set_sockopt() and _sock_tx_timestamp() are not called at all so the flag 
> SOCK_WIFI_STATUS is actually nevers set! What is printed when printing skb->sk->sk_flags looks
> suspiciously like a pointer, and as sk_flags is actually a member of a union in struct sock_common
> it seems clang is using sk_flags for one of the other union members here
> 
> struct sock_common {
> [...]
> 	union {
> 		unsigned long	skc_flags;
> 		struct sock	*skc_listener; /* request_sock */
> 		struct inet_timewait_death_row *skc_tw_dr; /* inet_timewait_sock */
> 	};
> [...]
> }
> 
> Bert Karwatzki

I added even more debugging output and found out why commit 76a853f86c97 (" wifi: free 
SKBTX_WIFI_STATUS skb tx_flags flag") does not work.

diff --git a/include/net/sock.h b/include/net/sock.h
index e13560b5b7a8..6e1291d2e5a1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2738,6 +2738,8 @@ static inline void _sock_tx_timestamp(struct sock *sk,
 	if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS))) {
 		printk(KERN_INFO "%s: setting SKBTX_WIFI_STATUS for sk = %px\n", __func__, sk);
 		*tx_flags |= SKBTX_WIFI_STATUS;
+	} else {
+		printk(KERN_INFO "%s: NOT setting SKBTX_WIFI_STATUS for sk = %px\n", __func__, sk);
 	}
 }
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 20915895bdaa..4913b09c0617 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -912,6 +912,7 @@ reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener,
 			return NULL;
 		}
 		req->rsk_listener = sk_listener;
+		printk(KERN_INFO "%s: sk_listener = %px\n", __func__, sk_listener);
 	}
 	req->rsk_ops = ops;
 	req_to_sk(req)->sk_prot = sk_listener->sk_prot;
@@ -986,6 +987,7 @@ static struct request_sock *inet_reqsk_clone(struct request_sock *req,
 	nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu;
 
 	nreq->rsk_listener = sk;
+	printk(KERN_INFO "%s: rsk_listener =%px\n", __func__, sk);
 
 	/* We need not acquire fastopenq->lock
 	 * because the child socket is locked in inet_csk_listen_stop().
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 67efe9501581..1a3108ec7503 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -190,6 +190,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 		const struct inet_sock *inet = inet_sk(sk);
 
 		tw->tw_dr	    = dr;
+		printk(KERN_INFO "%s: sk = %px tw_dr = %px\n", __func__, sk, dr);
 		/* Give us an identity. */
 		tw->tw_daddr	    = inet->inet_daddr;
 		tw->tw_rcv_saddr    = inet->inet_rcv_saddr;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index eee2f80949c6..227b86427e06 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4586,6 +4586,8 @@ static noinline void ieee80211_8023_xmit_clang_debug_helper(struct sk_buff *skb,
 							    struct ieee80211_local *local,
 							    struct ieee80211_tx_info *info)
 {
+	if (skb->sk)
+		printk(KERN_INFO "%s: skb->sk = %px skb->sk->sk_flags = 0x%lx\n", __func__, skb->sk, skb->sk->sk_flags);
 	if (unlikely(skb->sk && ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ||
 				sock_flag(skb->sk, SOCK_WIFI_STATUS)))) {
 		if ((skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ^ sock_flag(skb->sk, SOCK_WIFI_STATUS)) {


This monitor the value of skb->sk->sk_flags not only in the error case but in all cases, and also monitors
the places where the other members of the sk_flags union are set. The error occurs when at the start
of ieee80211_8023_xmit_clang_debug_helper() sk_flags is not actually the skc_flags member of the union
but insted is skc_tw_dr which is only interpreted is flags.
 So why does it work with gcc but fail with clang? sock_flag(skb->sk, SOCK_WIFI_STATUS) test bit 19 of 
skb->sk->sk_flags 

Here are the important snippets of debug output:

clang:
[  T575] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8f1bebba4300 skb->sk->sk_flags = 0xffffffffa16fe640

Here test_bit(0xffffffffa16fe640, SOCK_WIFI_STATUS) is 1.

gcc:
[  T600] ieee80211_8023_xmit_clang_debug_helper: skb->sk = ffff8d3506bec700 skb->sk->sk_flags = 0xffffffff93d40100
Here test_bit(0xffffffff93d40100, SOCK_WIFI_STATUS) is 0.

So that this works with gcc just seems like luck. I've not yet test why it works with clang when PREEMPT_RT is not
enabled but my guess is that in that case we have a tw_dr pointer which fails the test_bit().

Bert Karwatzki








  reply	other threads:[~2025-05-17 19:49 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-13 16:48 lockup and kernel panic in linux-next-202505{09,12} when compiled with clang Bert Karwatzki
2025-05-13 22:33 ` Thomas Gleixner
2025-05-14  0:11   ` Bert Karwatzki
2025-05-14  9:32     ` Bert Karwatzki
2025-05-14 10:23       ` Johannes Berg
2025-05-14 13:46         ` Bert Karwatzki
2025-05-14 17:49           ` Johannes Berg
2025-05-14 18:56           ` Johannes Berg
2025-05-14 22:27             ` Bert Karwatzki
2025-05-15  6:30               ` Johannes Berg
2025-05-15  9:10                 ` Bert Karwatzki
2025-05-16 18:19                   ` Bert Karwatzki
2025-05-17 11:34                     ` Bert Karwatzki
2025-05-17 19:49                       ` Bert Karwatzki [this message]
2025-05-18  1:30                         ` Jason Xing
2025-05-18 12:12                           ` Bert Karwatzki
2025-05-18 12:43                             ` Bert Karwatzki
2025-05-18 14:15                               ` Bert Karwatzki
2025-05-18 14:41                                 ` Bert Karwatzki
  -- strict thread matches above, loose matches on Subject: below --
2025-05-13 22:15 Bert Karwatzki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=15f3633cbd08b30475d5b76c5cc9180fbf17a12f.camel@web.de \
    --to=spasswolf@web.de \
    --cc=johannes@sipsolutions.net \
    --cc=kerneljasonxing@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-next@vger.kernel.org \
    --cc=linux-wireless@vger.kernel.org \
    --cc=llvm@lists.linux.dev \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox