public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Masami Hiramatsu <mhiramat@kernel.org>
To: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>,
	Steven Rostedt <rostedt@goodmis.org>,
	x86@kernel.org, linux-kernel@vger.kernel.org, bristot@redhat.com,
	jbaron@akamai.com, torvalds@linux-foundation.org,
	tglx@linutronix.de, namit@vmware.com, hpa@zytor.com,
	luto@kernel.org, ard.biesheuvel@linaro.org, jpoimboe@redhat.com,
	jeyu@kernel.org, alexei.starovoitov@gmail.com
Subject: Re: [PATCH -tip 1/2] x86/alternative: Sync bp_patching update for avoiding NULL pointer exception
Date: Wed, 11 Dec 2019 01:44:01 +0900	[thread overview]
Message-ID: <20191211014401.2f0c27f259a83d1f32aa6f2e@kernel.org> (raw)
In-Reply-To: <20191209143940.GI2810@hirez.programming.kicks-ass.net>

Hi Peter,

On Mon, 9 Dec 2019 15:39:40 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> On Wed, Nov 27, 2019 at 02:56:52PM +0900, Masami Hiramatsu wrote:
> 
> > diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> > index 4552795a8df4..9505096e2cd1 100644
> > --- a/arch/x86/kernel/alternative.c
> > +++ b/arch/x86/kernel/alternative.c
> > @@ -1134,8 +1134,14 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
> >  	 * sync_core() implies an smp_mb() and orders this store against
> >  	 * the writing of the new instruction.
> >  	 */
> > -	bp_patching.vec = NULL;
> >  	bp_patching.nr_entries = 0;
> > +	/*
> > +	 * This sync_core () ensures that all int3 handlers in progress
> > +	 * have finished. This allows poke_int3_handler () after this to
> > +	 * avoid touching bp_paching.vec by checking nr_entries == 0.
> > +	 */
> > +	text_poke_sync();
> > +	bp_patching.vec = NULL;
> >  }
> 
> How's something like this instead? Under the assumption that it is rare
> to actually hit the INT3 and even more rare to actually hit this race,
> the below should be a lot cheaper.

Ah, this reminds me of my atomic-refcounter method for kpatch idea
and module unloading.

This looks good, but I feel it is a bit complicated.

If we use atomic (and spin-wait) here, can we use atomic_inc_not_zero()
in the poke_int3_handler() at first for making sure the bp_batching is
under operation or not?
I think it makes things simpler, like below.

---------
atomic_t bp_refcnt;

poke_int3_handler()
{
	smp_rmb();
	if (!READ_ONCE(bp_patching.nr_entries))
		return 0;
	if (!atomic_inc_not_zero(&bp_refcnt))
		return 0;
	smp_mb__after_atomic();
	[use bp_patching]
	atomic_dec(&bp_refcnt);
}

text_poke_bp_batch()
{
	bp_patching.vec = tp;
	bp_patching.nr_entries = nr_entries;
	smp_wmb();
	atomic_inc(&bp_refcnt);
	...
	atomic_dec(&bp_refcnt);
	/* wait for all running poke_int3_handler(). */
	atomic_cond_read_acquire(&bp_refcnt, !VAL);
	bp_patching.vec = NULL;
	bp_patching.nr_entries = 0;
}
---------

Thank you,


> 
> ---
>  arch/x86/kernel/alternative.c | 69 +++++++++++++++++++++++++++++++++----------
>  1 file changed, 53 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index 30e86730655c..12f2d193109d 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -953,6 +953,8 @@ static struct bp_patching_desc {
>  	int nr_entries;
>  } bp_patching;
>  
> +static atomic_t bp_handlers;
> +
>  static inline void *text_poke_addr(struct text_poke_loc *tp)
>  {
>  	return _stext + tp->rel_addr;
> @@ -973,8 +975,8 @@ NOKPROBE_SYMBOL(patch_cmp);
>  int notrace poke_int3_handler(struct pt_regs *regs)
>  {
>  	struct text_poke_loc *tp;
> +	int nr, len, ret = 0;
>  	void *ip;
> -	int len;
>  
>  	/*
>  	 * Having observed our INT3 instruction, we now must observe
> @@ -987,12 +989,21 @@ int notrace poke_int3_handler(struct pt_regs *regs)
>  	 * Idem for other elements in bp_patching.
>  	 */
>  	smp_rmb();
> -
> -	if (likely(!bp_patching.nr_entries))
> +	if (!READ_ONCE(bp_patching.nr_entries))
>  		return 0;
>  
> +	atomic_inc(&bp_handlers);
> +	/*
> +	 * 'ACQUIRE', everything happens after the increment.
> +	 */
> +	smp_mb__after_atomic();
> +
> +	nr = smp_load_acquire(&bp_patching.nr_entries);
> +	if (likely(!nr))
> +		goto out;
> +
>  	if (user_mode(regs))
> -		return 0;
> +		goto out;
>  
>  	/*
>  	 * Discount the INT3. See text_poke_bp_batch().
> @@ -1002,16 +1013,16 @@ int notrace poke_int3_handler(struct pt_regs *regs)
>  	/*
>  	 * Skip the binary search if there is a single member in the vector.
>  	 */
> -	if (unlikely(bp_patching.nr_entries > 1)) {
> -		tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
> +	if (unlikely(nr > 1)) {
> +		tp = bsearch(ip, bp_patching.vec, nr,
>  			     sizeof(struct text_poke_loc),
>  			     patch_cmp);
>  		if (!tp)
> -			return 0;
> +			goto out;
>  	} else {
>  		tp = bp_patching.vec;
>  		if (text_poke_addr(tp) != ip)
> -			return 0;
> +			goto out;
>  	}
>  
>  	len = text_opcode_size(tp->opcode);
> @@ -1023,7 +1034,7 @@ int notrace poke_int3_handler(struct pt_regs *regs)
>  		 * Someone poked an explicit INT3, they'll want to handle it,
>  		 * do not consume.
>  		 */
> -		return 0;
> +		goto out;
>  
>  	case CALL_INSN_OPCODE:
>  		int3_emulate_call(regs, (long)ip + tp->rel32);
> @@ -1038,7 +1049,14 @@ int notrace poke_int3_handler(struct pt_regs *regs)
>  		BUG();
>  	}
>  
> -	return 1;
> +	ret = 1;
> +out:
> +	/*
> +	 * 'RELEASE", everything happens before the decrement.
> +	 */
> +	smp_mb__before_atomic();
> +	atomic_dec(&bp_handlers);
> +	return ret;
>  }
>  NOKPROBE_SYMBOL(poke_int3_handler);
>  
> @@ -1076,7 +1094,12 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
>  	lockdep_assert_held(&text_mutex);
>  
>  	bp_patching.vec = tp;
> -	bp_patching.nr_entries = nr_entries;
> +	/*
> +	 * bp_patching.vec = tp			nr = bp_patching.nr_entries
> +	 * REL					ACQ
> +	 * bp_patching.nr_entries = nr_entries	tp = bp_patching.vec[]
> +	 */
> +	smp_store_release(&bp_patching.nr_entries, nr_entries);
>  
>  	/*
>  	 * Corresponding read barrier in int3 notifier for making sure the
> @@ -1134,13 +1157,27 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
>  	 * sync_core() implies an smp_mb() and orders this store against
>  	 * the writing of the new instruction.
>  	 */
> -	bp_patching.nr_entries = 0;
> +	WRITE_ONCE(bp_patching.nr_entries, 0);
>  	/*
> -	 * This sync_core () call ensures that all INT3 handlers in progress
> -	 * have finished. This allows poke_int3_handler() after this to
> -	 * avoid touching bp_paching.vec by checking nr_entries == 0.
> +	 * nr_entries = 0	bp_handlers++
> +	 * MB			MB
> +	 * VAL = bp_handlers	nr = nr_entries
> +	 */
> +	smp_mb();
> +	/*
> +	 * Guarantee all poke_int3_handler()s that have observed
> +	 * @bp_patching.nr_enties have completed before we clear
> +	 * bp_patching.vec.
> +	 *
> +	 * We can't do this before text_poke_sync() because then there
> +	 * might still be observable INT3 instructions.
> +	 */
> +	atomic_cond_read_acquire(&bp_handlers, !VAL);
> +	/*
> +	 * bp_handlers == 0		tp = bp_patching.vec[]
> +	 * ACQ				MB
> +	 * bp_patching.vec = NULL	bp_handlers--;
>  	 */
> -	text_poke_sync();
>  	bp_patching.vec = NULL;
>  }
>  


-- 
Masami Hiramatsu <mhiramat@kernel.org>

  reply	other threads:[~2019-12-10 16:44 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-11-27  5:56 [PATCH -tip 0/2] x86/kprobes: Fix 2 issues related to text_poke_bp and optprobe Masami Hiramatsu
2019-11-27  5:56 ` [PATCH -tip 1/2] x86/alternative: Sync bp_patching update for avoiding NULL pointer exception Masami Hiramatsu
2019-12-02  9:15   ` Peter Zijlstra
2019-12-02 11:50     ` Masami Hiramatsu
2019-12-02 13:43       ` Peter Zijlstra
2019-12-02 14:39         ` Masami Hiramatsu
2019-12-04  8:33   ` [tip: core/kprobes] x86/alternatives: " tip-bot2 for Masami Hiramatsu
2019-12-09 14:39   ` [PATCH -tip 1/2] x86/alternative: " Peter Zijlstra
2019-12-10 16:44     ` Masami Hiramatsu [this message]
2019-12-10 17:32       ` Peter Zijlstra
2019-12-11  0:09         ` Peter Zijlstra
2019-12-11  8:09           ` Masami Hiramatsu
2019-12-11  9:12             ` Daniel Bristot de Oliveira
2019-11-27  5:57 ` [PATCH -tip 2/2] kprobes: Set unoptimized flag after unoptimizing code Masami Hiramatsu
2019-11-27  6:19   ` Alexei Starovoitov
2019-11-27  6:49     ` Ingo Molnar
2019-12-02 21:55       ` Alexei Starovoitov
2019-11-27  6:56     ` Masami Hiramatsu
2019-12-04  8:33   ` [tip: core/kprobes] " tip-bot2 for Masami Hiramatsu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191211014401.2f0c27f259a83d1f32aa6f2e@kernel.org \
    --to=mhiramat@kernel.org \
    --cc=alexei.starovoitov@gmail.com \
    --cc=ard.biesheuvel@linaro.org \
    --cc=bristot@redhat.com \
    --cc=hpa@zytor.com \
    --cc=jbaron@akamai.com \
    --cc=jeyu@kernel.org \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@kernel.org \
    --cc=namit@vmware.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox