linux-rt-devel.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
* [PATCH] nptl: Use a PI-aware lock for internal pthread_cond-related locking
@ 2025-09-08 16:33 Sebastian Andrzej Siewior
  2025-09-08 17:46 ` Adhemerval Zanella Netto
  0 siblings, 1 reply; 10+ messages in thread
From: Sebastian Andrzej Siewior @ 2025-09-08 16:33 UTC (permalink / raw)
  To: libc-alpha; +Cc: John Ogness, linux-rt-devel, Thomas Gleixner

The pthread_cond_t structure provides an internal lock using the lower two bits
of __g1_orig_size. This lock is always used by the signaller. The waiter uses
it only in certain corner cases such as when canceling a wait due to a timeout.

Under contention, a thread attempting to acquire the lock invokes the futex
operation FUTEX_WAIT while waiting for the lock to become available. This
behavior can be problematic in real-time environments because the priority of
the waiting task is not considered. As a result, a lower-priority task may
become runnable while the lock owner remains preempted.

To address this, repurpose the unused member __unused_initialized_1 as a
priority-inheritance (PI) futex named __pi_lock. This is the futex word, not a
pthread_mutex_t.

__condvar_acquire_lock() performs an atomic transition from 0 to TID. If it
fails, it falls back to __futex_lock_pi64(), which acquires the lock via
FUTEX_LOCK_PI.
__condvar_release_lock() performs an atomic transition from TID to 0. If it
fails, it falls back to futex_unlock_pi(), which releases the lock and
wakes a potential waiter.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---

This is a follow-up to
	https://lore.kernel.org/all/20250829145659.KhM4kIoQ@linutronix.de/

This change expects that PI-FUTEX is supported by the kernel. This is
the case since a long time but it is possible to disable the FUTEX
subsystem or the FUTEX_PI part of it while building the kernel.
Is it okay to assume that PI-FUTEX is available or should there be a
check somewhere during startup and in case of -ENOSYS a fallback to
current implementation?

 nptl/pthread_cond_common.c              | 58 ++++++++++---------------
 sysdeps/nptl/bits/thread-shared-types.h |  2 +-
 2 files changed, 24 insertions(+), 36 deletions(-)

diff --git a/nptl/pthread_cond_common.c b/nptl/pthread_cond_common.c
index 2708d262958f2..74010787b18de 100644
--- a/nptl/pthread_cond_common.c
+++ b/nptl/pthread_cond_common.c
@@ -106,41 +106,23 @@ __condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val)
 #endif /* !__HAVE_64B_ATOMICS */
 
 /* The lock that signalers use.  See pthread_cond_wait_common for uses.
-   The lock is our normal three-state lock: not acquired (0) / acquired (1) /
-   acquired-with-futex_wake-request (2).  However, we need to preserve the
-   other bits in the unsigned int used for the lock, and therefore it is a
-   little more complex.  */
+   The lock is the futex PI lock (PTHREAD_MUTEX_PI_NORMAL_NP) using just the
+   futex word. */
 static void __attribute__ ((unused))
 __condvar_acquire_lock (pthread_cond_t *cond, int private)
 {
-  unsigned int s = atomic_load_relaxed (&cond->__data.__g1_orig_size);
-  while ((s & 3) == 0)
+  int oldval, newval;
+
+  newval = THREAD_GETMEM (THREAD_SELF, tid);
+  oldval = atomic_compare_and_exchange_val_acq (&cond->__data.__pi_lock,
+                                                newval, 0);
+  if (oldval != 0)
     {
-      if (atomic_compare_exchange_weak_acquire (&cond->__data.__g1_orig_size,
-	  &s, s | 1))
-	return;
-      /* TODO Spinning and back-off.  */
-    }
-  /* We can't change from not acquired to acquired, so try to change to
-     acquired-with-futex-wake-request and do a futex wait if we cannot change
-     from not acquired.  */
-  while (1)
-    {
-      while ((s & 3) != 2)
-	{
-	  if (atomic_compare_exchange_weak_acquire
-	      (&cond->__data.__g1_orig_size, &s, (s & ~(unsigned int) 3) | 2))
-	    {
-	      if ((s & 3) == 0)
-		return;
-	      break;
-	    }
-	  /* TODO Back off.  */
-	}
-      futex_wait_simple (&cond->__data.__g1_orig_size,
-	  (s & ~(unsigned int) 3) | 2, private);
-      /* Reload so we see a recent value.  */
-      s = atomic_load_relaxed (&cond->__data.__g1_orig_size);
+      int e;
+
+      e = __futex_lock_pi64 (&cond->__data.__pi_lock, 0, NULL, private);
+      if (e != 0)
+        futex_fatal_error ();
     }
 }
 
@@ -148,10 +130,16 @@ __condvar_acquire_lock (pthread_cond_t *cond, int private)
 static void __attribute__ ((unused))
 __condvar_release_lock (pthread_cond_t *cond, int private)
 {
-  if ((atomic_fetch_and_release (&cond->__data.__g1_orig_size,
-				 ~(unsigned int) 3) & 3)
-      == 2)
-    futex_wake (&cond->__data.__g1_orig_size, 1, private);
+  pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
+
+  if (!atomic_compare_exchange_weak_release (&cond->__data.__pi_lock, &id, 0))
+    {
+      int e;
+
+      e = futex_unlock_pi ((unsigned int *) &cond->__data.__pi_lock, private);
+      if (e != 0)
+	futex_fatal_error ();
+    }
 }
 
 /* Only use this when having acquired the lock.  */
diff --git a/sysdeps/nptl/bits/thread-shared-types.h b/sysdeps/nptl/bits/thread-shared-types.h
index e614c7f3c900e..9b362d5282cd9 100644
--- a/sysdeps/nptl/bits/thread-shared-types.h
+++ b/sysdeps/nptl/bits/thread-shared-types.h
@@ -99,7 +99,7 @@ struct __pthread_cond_s
   unsigned int __g1_orig_size;
   unsigned int __wrefs;
   unsigned int __g_signals[2];
-  unsigned int __unused_initialized_1;
+  int __pi_lock;
   unsigned int __unused_initialized_2;
 };
 
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2025-09-10  5:57 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-08 16:33 [PATCH] nptl: Use a PI-aware lock for internal pthread_cond-related locking Sebastian Andrzej Siewior
2025-09-08 17:46 ` Adhemerval Zanella Netto
2025-09-09  7:32   ` Sebastian Andrzej Siewior
2025-09-09 12:22     ` Florian Weimer
2025-09-09 15:37       ` Sebastian Andrzej Siewior
2025-09-09 16:32         ` Florian Weimer
2025-09-09 19:14           ` Adhemerval Zanella Netto
2025-09-10  5:57             ` Florian Weimer
2025-09-09 13:09     ` Adhemerval Zanella Netto
2025-09-09 15:52       ` Sebastian Andrzej Siewior

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).