All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: linux-kernel@vger.kernel.org, tglx@linutronix.de,
	boqun.feng@gmail.com, bristot@redhat.com, bsegall@google.com,
	dietmar.eggemann@arm.com, jstultz@google.com,
	juri.lelli@redhat.com, longman@redhat.com, mgorman@suse.de,
	mingo@redhat.com, rostedt@goodmis.org, swood@redhat.com,
	vincent.guittot@linaro.org, vschneid@redhat.com, will@kernel.org
Subject: Re: [PATCH v3 7/7] locking/rtmutex: Acquire the hb lock via trylock after wait-proxylock.
Date: Tue, 12 Sep 2023 13:25:51 +0200	[thread overview]
Message-ID: <20230912112551.GH35261@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20230912111711.DHVKG-B4@linutronix.de>

On Tue, Sep 12, 2023 at 01:17:11PM +0200, Sebastian Andrzej Siewior wrote:

> Comments follow…
> 
> diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c
> index f8e65b27d9d6b..d8866278e92ff 100644
> --- a/kernel/futex/pi.c
> +++ b/kernel/futex/pi.c
> @@ -611,29 +611,16 @@ int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
>  /*
>   * Caller must hold a reference on @pi_state.
>   */
> -static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
> +static int wake_futex_pi(u32 __user *uaddr, u32 uval,
> +			 struct futex_pi_state *pi_state,
> +			 struct rt_mutex_waiter *top_waiter)
>  {
> -	struct rt_mutex_waiter *top_waiter;
>  	struct task_struct *new_owner;
>  	bool postunlock = false;
>  	DEFINE_RT_WAKE_Q(wqh);
>  	u32 curval, newval;
>  	int ret = 0;
>  
> -	top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
> -	if (WARN_ON_ONCE(!top_waiter)) {
> -		/*
> -		 * As per the comment in futex_unlock_pi() this should not happen.
> -		 *
> -		 * When this happens, give up our locks and try again, giving
> -		 * the futex_lock_pi() instance time to complete, either by
> -		 * waiting on the rtmutex or removing itself from the futex
> -		 * queue.
> -		 */
> -		ret = -EAGAIN;
> -		goto out_unlock;
> -	}
> -
>  	new_owner = top_waiter->task;
>  
>  	/*
> @@ -1046,15 +1033,18 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
>  	ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
>  
>  cleanup:
> -	spin_lock(q.lock_ptr);
>  	/*
>  	 * If we failed to acquire the lock (deadlock/signal/timeout), we must
>  	 * first acquire the hb->lock before removing the lock from the
>  	 * rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
>  	 * lists consistent.
>  	 *
> -	 * In particular; it is important that futex_unlock_pi() can not
> -	 * observe this inconsistency.
> +	 * Cannot hold hb->lock because rt_mutex already has a waiter enqueued
> +	 * and hb->lock can itself try and enqueue an rt_waiter through rtlock.
> +	 *
> +	 * Doing the cleanup without holding hb->lock can cause inconsistent
> +	 * state between hb and pi_state, but only in the direction of seeing a
> +	 * waiter that is leaving.
>  	 */
>  	if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
>  		ret = 0;
> @@ -1063,6 +1053,12 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
>  	 * Waiter is unqueued.
>  	 */
>  	rt_mutex_post_schedule();
> +
> +	/*
> +	 * Now that the rt_waiter has been dequeued, it is safe to use
> +	 * spinlock/rtlock, which will enqueue a new rt_waiter.
> +	 */
> +	spin_lock(q.lock_ptr);
>  no_block:
>  	/*
>  	 * Fixup the pi_state owner and possibly acquire the lock if we
> @@ -1143,6 +1139,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
>  	top_waiter = futex_top_waiter(hb, &key);
>  	if (top_waiter) {
>  		struct futex_pi_state *pi_state = top_waiter->pi_state;
> +		struct rt_mutex_waiter *rt_waiter;
>  
>  		ret = -EINVAL;
>  		if (!pi_state)
> @@ -1158,19 +1155,34 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
>  		get_pi_state(pi_state);
>  		/*
>  		 * By taking wait_lock while still holding hb->lock, we ensure
> -		 * there is no point where we hold neither; and therefore
> -		 * wake_futex_p() must observe a state consistent with what we
> -		 * observed.
> +		 * there is no point where we hold neither; and thereby
> +		 * wake_futex_pi() must observe any new waiters.
> +		 *
> +		 * Since the cleanup: case in futex_lock_pi() removes the
> +		 * rt_waiter without holding hb->lock, it is possible for
> +		 * wake_futex_pi() to not find a waiter while the above does,
> +		 * in this case the waiter is on the way out and it can be
> +		 * ignored.
>  		 *
>  		 * In particular; this forces __rt_mutex_start_proxy() to
>  		 * complete such that we're guaranteed to observe the
> -		 * rt_waiter. Also see the WARN in wake_futex_pi().
> +		 * rt_waiter.
>  		 */
>  		raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
> +
> +		/*
> +		 * Futex vs rt_mutex waiter state -- if there are on rt_mutex
> +		 * waiters even though futex thinkgs there are, then the waiter
> +		 * is leaving and the uncontended path is safe to take.
> +		 */
> +		rt_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
> +		if (!rt_waiter)
> +			goto do_uncontended;

That wants to be:

		if (!rt_waiter) {
			raw_spin_unlock_irQ(&pi_state->pi_mutex.wait_lock);
			goto do_uncontended;
		}

> +
>  		spin_unlock(&hb->lock);
>  
>  		/* drops pi_state->pi_mutex.wait_lock */
> -		ret = wake_futex_pi(uaddr, uval, pi_state);
> +		ret = wake_futex_pi(uaddr, uval, pi_state, rt_waiter);
>  
>  		put_pi_state(pi_state);
>  
> @@ -1198,6 +1210,7 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
>  		return ret;
>  	}
>  
> +do_uncontended:
>  	/*
>  	 * We have no kernel internal state, i.e. no waiters in the
>  	 * kernel. Waiters which are about to queue themselves are stuck
> 

And I think there was a requeue site that wants updating too.. the above
was more or less a starting point for discussion :-)

  parent reply	other threads:[~2023-09-12 11:26 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-08 16:22 [PATCH v3 0/7] locking/rtmutex: Avoid PI state recursion through sched_submit_work() Sebastian Andrzej Siewior
2023-09-08 16:22 ` [PATCH v3 1/7] sched: Constrain locks in sched_submit_work() Sebastian Andrzej Siewior
2023-09-20  7:36   ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-09-08 16:22 ` [PATCH v3 2/7] locking/rtmutex: Avoid unconditional slowpath for DEBUG_RT_MUTEXES Sebastian Andrzej Siewior
2023-09-20  7:36   ` [tip: locking/core] " tip-bot2 for Sebastian Andrzej Siewior
2023-09-08 16:22 ` [PATCH v3 3/7] sched: Extract __schedule_loop() Sebastian Andrzej Siewior
2023-09-20  7:36   ` [tip: locking/core] " tip-bot2 for Thomas Gleixner
2023-09-08 16:22 ` [PATCH v3 4/7] sched: Provide rt_mutex specific scheduler helpers Sebastian Andrzej Siewior
2023-09-20  7:36   ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-09-08 16:22 ` [PATCH v3 5/7] locking/rtmutex: Use " Sebastian Andrzej Siewior
2023-09-20  7:36   ` [tip: locking/core] " tip-bot2 for Sebastian Andrzej Siewior
2023-09-08 16:22 ` [PATCH v3 6/7] locking/rtmutex: Add a lockdep assert to catch potential nested blocking Sebastian Andrzej Siewior
2023-09-20  7:36   ` [tip: locking/core] " tip-bot2 for Thomas Gleixner
2023-09-08 16:22 ` [PATCH v3 7/7] locking/rtmutex: Acquire the hb lock via trylock after wait-proxylock Sebastian Andrzej Siewior
2023-09-09 18:29   ` Peter Zijlstra
2023-09-11 14:11   ` Peter Zijlstra
2023-09-12 10:57     ` Peter Zijlstra
2023-09-12 11:26       ` Sebastian Andrzej Siewior
2023-09-12 11:17     ` Sebastian Andrzej Siewior
2023-09-12 11:24       ` Peter Zijlstra
2023-09-12 11:25       ` Sebastian Andrzej Siewior
2023-09-12 11:25       ` Peter Zijlstra [this message]
2023-09-12 11:28         ` Sebastian Andrzej Siewior
2023-09-15 12:58     ` Thomas Gleixner
2023-09-15 13:15       ` Sebastian Andrzej Siewior
2023-09-15 15:19       ` Peter Zijlstra
2023-09-15 18:59         ` Thomas Gleixner
2023-09-19 11:03         ` Sebastian Andrzej Siewior
2023-09-20  7:36         ` [tip: locking/core] futex/pi: Fix recursive rt_mutex waiter state tip-bot2 for Peter Zijlstra
2024-01-15 11:40         ` [PATCH v3 7/7] locking/rtmutex: Acquire the hb lock via trylock after wait-proxylock Jiri Slaby
2024-01-15 11:52           ` Jiri Slaby
2024-01-15 12:16             ` Sebastian Andrzej Siewior
2024-01-15 12:54             ` Jiri Slaby
2024-01-15 15:32               ` Yann Ylavic
2024-01-15 18:33               ` Sebastian Andrzej Siewior
2024-01-16  7:07                 ` Jiri Slaby

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230912112551.GH35261@noisy.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=bigeasy@linutronix.de \
    --cc=boqun.feng@gmail.com \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=jstultz@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=longman@redhat.com \
    --cc=mgorman@suse.de \
    --cc=mingo@redhat.com \
    --cc=rostedt@goodmis.org \
    --cc=swood@redhat.com \
    --cc=tglx@linutronix.de \
    --cc=vincent.guittot@linaro.org \
    --cc=vschneid@redhat.com \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.