public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Pan Xinhui <xinhui@linux.vnet.ibm.com>
To: Waiman Long <Waiman.Long@hpe.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>
Cc: linux-kernel@vger.kernel.org,
	Scott J Norton <scott.norton@hpe.com>,
	Douglas Hatch <doug.hatch@hpe.com>
Subject: Re: [PATCH] locking/pvqspinlock: Add lock holder CPU argument to pv_wait()
Date: Thu, 14 Apr 2016 17:36:07 +0800	[thread overview]
Message-ID: <570F6487.8070706@linux.vnet.ibm.com> (raw)
In-Reply-To: <1460581160-46187-1-git-send-email-Waiman.Long@hpe.com>



On 2016年04月14日 04:59, Waiman Long wrote:
> Pan Xinhui was asking for a lock holder cpu argument in pv_wait()
> to help the porting of pvqspinlock to PPC. The new argument will can
> potentially help hypervisor expediate the execution of the critical
> section so that the lock holder vCPU can release the lock sooner.
> 
> This patch does just that by storing the previous node vCPU number.
> In pv_wait_head_or_lock(), pv_wait() will be called with that vCPU
> number as it is likely to be the lock holder. In pv_wait_node(),
> -1 will be passed to pv_wait() instead to indicate that it doesn't
> know what the current lock holder is.
> 
> This patch introduces negligible overhead to the current pvqspinlock
> code. The extra lockcpu argument isn't currently used in x86
> architecture.
> 
> Signed-off-by: Waiman Long <Waiman.Long@hpe.com>
> ---
>  arch/x86/include/asm/paravirt.h       |    4 ++--
>  arch/x86/include/asm/paravirt_types.h |    2 +-
>  arch/x86/kernel/kvm.c                 |    2 +-
>  arch/x86/xen/spinlock.c               |    2 +-
>  kernel/locking/qspinlock_paravirt.h   |   19 +++++++++++++++----
>  kernel/locking/qspinlock_stat.h       |    8 ++++----
>  6 files changed, 24 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
> index 601f1b8..b89eccf 100644
> --- a/arch/x86/include/asm/paravirt.h
> +++ b/arch/x86/include/asm/paravirt.h
> @@ -676,9 +676,9 @@ static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
>  	PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
>  }
> 
> -static __always_inline void pv_wait(u8 *ptr, u8 val)
> +static __always_inline void pv_wait(u8 *ptr, u8 val, int lockcpu)
>  {
> -	PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
> +	PVOP_VCALL3(pv_lock_ops.wait, ptr, val, lockcpu);
>  }
> 
>  static __always_inline void pv_kick(int cpu)
> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
> index e8c2326..2fc26c1 100644
> --- a/arch/x86/include/asm/paravirt_types.h
> +++ b/arch/x86/include/asm/paravirt_types.h
> @@ -312,7 +312,7 @@ struct pv_lock_ops {
>  	void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
>  	struct paravirt_callee_save queued_spin_unlock;
> 
> -	void (*wait)(u8 *ptr, u8 val);
> +	void (*wait)(u8 *ptr, u8 val, int lockcpu);
>  	void (*kick)(int cpu);
>  #else /* !CONFIG_QUEUED_SPINLOCKS */
>  	struct paravirt_callee_save lock_spinning;
> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
> index dc1207e..47ab4e1 100644
> --- a/arch/x86/kernel/kvm.c
> +++ b/arch/x86/kernel/kvm.c
> @@ -590,7 +590,7 @@ static void kvm_kick_cpu(int cpu)
> 
>  #include <asm/qspinlock.h>
> 
> -static void kvm_wait(u8 *ptr, u8 val)
> +static void kvm_wait(u8 *ptr, u8 val, int lockcpu)
>  {
>  	unsigned long flags;
> 
> diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
> index 9e2ba5c..6f78c41 100644
> --- a/arch/x86/xen/spinlock.c
> +++ b/arch/x86/xen/spinlock.c
> @@ -33,7 +33,7 @@ static void xen_qlock_kick(int cpu)
>  /*
>   * Halt the current CPU & release it back to the host
>   */
> -static void xen_qlock_wait(u8 *byte, u8 val)
> +static void xen_qlock_wait(u8 *byte, u8 val, int lockcpu)
>  {
>  	int irq = __this_cpu_read(lock_kicker_irq);
> 
> diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
> index 21ede57..4bec98b 100644
> --- a/kernel/locking/qspinlock_paravirt.h
> +++ b/kernel/locking/qspinlock_paravirt.h
> @@ -51,6 +51,7 @@ struct pv_node {
>  	struct mcs_spinlock	__res[3];
> 
>  	int			cpu;
> +	int			prev_cpu;	/* Previous node cpu */
>  	u8			state;
>  };
> 
> @@ -156,8 +157,7 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
>   * 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page.
>   *
>   * Since we should not be holding locks from NMI context (very rare indeed) the
> - * max load factor is 0.75, which is around the point where open addressing
> - * breaks down.
> + * max load factor is 0.75.
>   *
>   */
>  struct pv_hash_entry {
> @@ -275,6 +275,7 @@ static void pv_init_node(struct mcs_spinlock *node)
> 
>  	pn->cpu = smp_processor_id();
>  	pn->state = vcpu_running;
> +	pn->prev_cpu = -1;
>  }
> 
>  /*
> @@ -290,6 +291,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
>  	int loop;
>  	bool wait_early;
> 
> +	pn->prev_cpu = pp->cpu;	/* Save previous node vCPU */
> +
>  	/* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
>  	for (;; waitcnt++) {
>  		for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
> @@ -317,7 +320,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
>  			qstat_inc(qstat_pv_wait_node, true);
>  			qstat_inc(qstat_pv_wait_again, waitcnt);
>  			qstat_inc(qstat_pv_wait_early, wait_early);
> -			pv_wait(&pn->state, vcpu_halted);
> +			pv_wait(&pn->state, vcpu_halted, -1);
If the contention is high, we might run here. And we indeed need the lock holder on such scenario.
how about that, we store the lock into pv_node, then search the lock in hashtable,
code might look like,
	node = pv_hash_lookup(pn->lock);
	pv_wait(...,node->holder);
thanks
xinhui
>  		}
> 
>  		/*
> @@ -453,7 +456,15 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
>  		WRITE_ONCE(pn->state, vcpu_halted);
>  		qstat_inc(qstat_pv_wait_head, true);
>  		qstat_inc(qstat_pv_wait_again, waitcnt);
> -		pv_wait(&l->locked, _Q_SLOW_VAL);
> +
> +		/*
> +		 * Pass in the previous node vCPU nmber which is likely to be
> +		 * the lock holder vCPU. This additional information may help
> +		 * the hypervisor to give more resource to that vCPU so that
> +		 * it can release the lock faster. With lock stealing,
> +		 * however, that vCPU may not be the actual lock holder.
> +		 */
> +		pv_wait(&l->locked, _Q_SLOW_VAL, pn->prev_cpu);
> 
>  		/*
>  		 * The unlocker should have freed the lock before kicking the
> diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
> index eb2a2c9..8728348 100644
> --- a/kernel/locking/qspinlock_stat.h
> +++ b/kernel/locking/qspinlock_stat.h
> @@ -266,12 +266,12 @@ static inline void __pv_kick(int cpu)
>  /*
>   * Replacement function for pv_wait()
>   */
> -static inline void __pv_wait(u8 *ptr, u8 val)
> +static inline void __pv_wait(u8 *ptr, u8 val, int lockcpu)
>  {
>  	u64 *pkick_time = this_cpu_ptr(&pv_kick_time);
> 
>  	*pkick_time = 0;
> -	pv_wait(ptr, val);
> +	pv_wait(ptr, val, lockcpu);
>  	if (*pkick_time) {
>  		this_cpu_add(qstats[qstat_pv_latency_wake],
>  			     sched_clock() - *pkick_time);
> @@ -279,8 +279,8 @@ static inline void __pv_wait(u8 *ptr, u8 val)
>  	}
>  }
> 
> -#define pv_kick(c)	__pv_kick(c)
> -#define pv_wait(p, v)	__pv_wait(p, v)
> +#define pv_kick(c)		__pv_kick(c)
> +#define pv_wait(p, v, c)	__pv_wait(p, v, c)
> 
>  #else /* CONFIG_QUEUED_LOCK_STAT */
> 

  parent reply	other threads:[~2016-04-14  9:37 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-13 20:59 [PATCH] locking/pvqspinlock: Add lock holder CPU argument to pv_wait() Waiman Long
2016-04-14  0:21 ` Peter Zijlstra
2016-04-14  1:59   ` Waiman Long
2016-04-14  8:39   ` Pan Xinhui
2016-04-14  9:36 ` Pan Xinhui [this message]
2016-04-14 14:34   ` Pan Xinhui
2016-04-14 18:47     ` Waiman Long

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=570F6487.8070706@linux.vnet.ibm.com \
    --to=xinhui@linux.vnet.ibm.com \
    --cc=Waiman.Long@hpe.com \
    --cc=doug.hatch@hpe.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=scott.norton@hpe.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox