linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ftrace: protect executing nmi
@ 2009-03-16 12:54 Lai Jiangshan
  2009-03-16 17:42 ` Steven Rostedt
  0 siblings, 1 reply; 10+ messages in thread
From: Lai Jiangshan @ 2009-03-16 12:54 UTC (permalink / raw)
  To: Ingo Molnar, Steven Rostedt, LKML


When I review the sensitive code ftrace_nmi_enter(), I found
the atomic variable nmi_running does protect NMI VS do_ftrace_mod_code(),
but it can not protects NMI(entered nmi) VS NMI(ftrace_nmi_enter()).

cpu#1                   | cpu#2                 | cpu#3
ftrace_nmi_enter()      | do_ftrace_mod_code()  |
  not modify            |                       |
------------------------|-----------------------|--
executing               | set mod_code_write = 1|
executing             --|-----------------------|--------------------
executing               |                       | ftrace_nmi_enter()
executing               |                       |    do modify
------------------------|-----------------------|-----------------
ftrace_nmi_exit()       |                       |

cpu#3 may be being modified the code which is still being executed on cpu#1,
it will have undefined results and possibly take a GPF, this patch
prevents it occurred.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d0d7f4..e016f5e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -87,7 +87,8 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  *
  * If an NMI is executed, the first thing it does is to call
  * "ftrace_nmi_enter". This will check if the flag is set to write
- * and if it is, it will write what is in the IP and "code" buffers.
+ * and if it is, and there is no executing nmi, it will write
+ * what is in the IP and "code" buffers.
  *
  * The trick is, it does not matter if everyone is writing the same
  * content to the code location. Also, if a CPU is executing code
@@ -96,6 +97,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  */
 
 static atomic_t nmi_running = ATOMIC_INIT(0);
+static atomic_t nmi_executing = ATOMIC_INIT(0);
 static int mod_code_status;		/* holds return value of text write */
 static int mod_code_write;		/* set when NMI should do the write */
 static void *mod_code_ip;		/* holds the IP to write to */
@@ -135,14 +137,18 @@ void ftrace_nmi_enter(void)
 	atomic_inc(&nmi_running);
 	/* Must have nmi_running seen before reading write flag */
 	smp_mb();
-	if (mod_code_write) {
+	if (!atomic_read(&nmi_executing) && mod_code_write) {
 		ftrace_mod_code();
 		atomic_inc(&nmi_update_count);
 	}
+	atomic_inc(&nmi_executing);
+	smp_mb();
 }
 
 void ftrace_nmi_exit(void)
 {
+	smp_mb();
+	atomic_dec(&nmi_executing);
 	/* Finish all executions before clearing nmi_running */
 	smp_wmb();
 	atomic_dec(&nmi_running);





^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] ftrace: protect executing nmi
  2009-03-16 12:54 [PATCH] ftrace: protect executing nmi Lai Jiangshan
@ 2009-03-16 17:42 ` Steven Rostedt
  2009-03-17 12:54   ` [PATCH 1/2] ftrace: protect running nmi Lai Jiangshan
  0 siblings, 1 reply; 10+ messages in thread
From: Steven Rostedt @ 2009-03-16 17:42 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Ingo Molnar, LKML


On Mon, 2009-03-16 at 20:54 +0800, Lai Jiangshan wrote:
> When I review the sensitive code ftrace_nmi_enter(), I found
> the atomic variable nmi_running does protect NMI VS do_ftrace_mod_code(),
> but it can not protects NMI(entered nmi) VS NMI(ftrace_nmi_enter()).
> 
> cpu#1                   | cpu#2                 | cpu#3
> ftrace_nmi_enter()      | do_ftrace_mod_code()  |
>   not modify            |                       |
> ------------------------|-----------------------|--
> executing               | set mod_code_write = 1|
> executing             --|-----------------------|--------------------
> executing               |                       | ftrace_nmi_enter()
> executing               |                       |    do modify
> ------------------------|-----------------------|-----------------
> ftrace_nmi_exit()       |                       |

Very good review!

This race is possible, although very unlikely, but must be fixed
regardless.

> 
> cpu#3 may be being modified the code which is still being executed on cpu#1,
> it will have undefined results and possibly take a GPF, this patch
> prevents it occurred.

Unfortunately your patch does not solve the problem. It only makes the
race window smaller.

> 
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> ---
> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 1d0d7f4..e016f5e 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -87,7 +87,8 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
>   *
>   * If an NMI is executed, the first thing it does is to call
>   * "ftrace_nmi_enter". This will check if the flag is set to write
> - * and if it is, it will write what is in the IP and "code" buffers.
> + * and if it is, and there is no executing nmi, it will write
> + * what is in the IP and "code" buffers.
>   *
>   * The trick is, it does not matter if everyone is writing the same
>   * content to the code location. Also, if a CPU is executing code
> @@ -96,6 +97,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
>   */
>  
>  static atomic_t nmi_running = ATOMIC_INIT(0);
> +static atomic_t nmi_executing = ATOMIC_INIT(0);
>  static int mod_code_status;		/* holds return value of text write */
>  static int mod_code_write;		/* set when NMI should do the write */
>  static void *mod_code_ip;		/* holds the IP to write to */
> @@ -135,14 +137,18 @@ void ftrace_nmi_enter(void)
>  	atomic_inc(&nmi_running);
>  	/* Must have nmi_running seen before reading write flag */
>  	smp_mb();
> -	if (mod_code_write) {
> +	if (!atomic_read(&nmi_executing) && mod_code_write) {
>  		ftrace_mod_code();
>  		atomic_inc(&nmi_update_count);
>  	}

Here we have another race window. If cpu#1 has that NMI and right here
we get a SMI (something to make the race window bigger). cpu#2 could
have set the mod_code_write and cpu#3 could have another NMI that sees
it but does not see the nmi_executing flag. Now we are in the same
scenario as you nicely described up above.

> +	atomic_inc(&nmi_executing);
> +	smp_mb();
>  }
>  
>  void ftrace_nmi_exit(void)
>  {
> +	smp_mb();
> +	atomic_dec(&nmi_executing);
>  	/* Finish all executions before clearing nmi_running */
>  	smp_wmb();
>  	atomic_dec(&nmi_running);
> 


The solution is to connect the mod_code_write with the nmi_enter and
nmi_exit. Make mod_code_write an atomic.

void ftrace_nmi_enter(void)
{	
	if (atomic_inc_return(&mod_code_write) > 10000) {
		ftrace_mod_code();
		atomic_inc(&nmi_update_count);
	}
	smp_mb();
}

void ftrace_nmi_exit(void)
{
	smp_mb();
	atomic_dec(&mod_code_write);
}

Then in do_ftrace_mod_code ...


	while (atomic_cmpxchg(&mod_code_write, 0, 10001) != 0)
		;

	[...]


	while (atomic_cmpxchg(&mode_code_write, 10001, 0) != 10001)
		;


Does this look like it would solve the issue?

-- Steve



^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/2] ftrace: protect running nmi
  2009-03-16 17:42 ` Steven Rostedt
@ 2009-03-17 12:54   ` Lai Jiangshan
  2009-03-17 12:58     ` [PATCH 2/2] ftrace: fast path for do_ftrace_mod_code() Lai Jiangshan
  2009-03-18  8:42     ` [PATCH] ftrace: protect running nmi (V3) Lai Jiangshan
  0 siblings, 2 replies; 10+ messages in thread
From: Lai Jiangshan @ 2009-03-17 12:54 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Ingo Molnar, LKML

Steven Rostedt wrote:
>>  	atomic_inc(&nmi_running);
>>  	/* Must have nmi_running seen before reading write flag */
>>  	smp_mb();
>> -	if (mod_code_write) {
>> +	if (!atomic_read(&nmi_executing) && mod_code_write) {
>>  		ftrace_mod_code();
>>  		atomic_inc(&nmi_update_count);
>>  	}
> 
> Here we have another race window. If cpu#1 has that NMI and right here
> we get a SMI (something to make the race window bigger). cpu#2 could
> have set the mod_code_write and cpu#3 could have another NMI that sees
> it but does not see the nmi_executing flag. Now we are in the same
> scenario as you nicely described up above.

I missed this window.

> void ftrace_nmi_enter(void)
> {	
> 	if (atomic_inc_return(&mod_code_write) > 10000) {
> 		ftrace_mod_code();
> 		atomic_inc(&nmi_update_count);
> 	}
> 	smp_mb();
> }
> 
> void ftrace_nmi_exit(void)
> {
> 	smp_mb();
> 	atomic_dec(&mod_code_write);
> }
> 
> Then in do_ftrace_mod_code ...
> 
> 
> 	while (atomic_cmpxchg(&mod_code_write, 0, 10001) != 0)
> 		;
> 
> 	[...]
> 
> 
> 	while (atomic_cmpxchg(&mode_code_write, 10001, 0) != 10001)
> 		;
> 
> 
> Does this look like it would solve the issue?
> 

It's very nice. The write-flag and the counter are put into an atomic
together. The write-flag is changed only when there is no running NMI.
So any NMI sees this flag, all other running NMIs also see this flag
when them were entering.


Subject: [PATCH 1/2] ftrace: protect running nmi (V2)

When I review the sensitive code ftrace_nmi_enter(), I found
the atomic variable nmi_running does protect NMI VS do_ftrace_mod_code(),
but it can not protects NMI(entered nmi) VS NMI(ftrace_nmi_enter()).

cpu#1                   | cpu#2                 | cpu#3
ftrace_nmi_enter()      | do_ftrace_mod_code()  |
  not modify            |                       |
------------------------|-----------------------|--
executing               | set mod_code_write = 1|
executing             --|-----------------------|--------------------
executing               |                       | ftrace_nmi_enter()
executing               |                       |    do modify
------------------------|-----------------------|-----------------
ftrace_nmi_exit()       |                       |

cpu#3 may be being modified the code which is still being executed on cpu#1,
it will have undefined results and possibly take a GPF, this patch
prevents it occurred.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d0d7f4..699a1c0 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -79,11 +79,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  *
  * 1) Put the instruction pointer into the IP buffer
  *    and the new code into the "code" buffer.
- * 2) Set a flag that says we are modifying code
- * 3) Wait for any running NMIs to finish.
- * 4) Write the code
- * 5) clear the flag.
- * 6) Wait for any running NMIs to finish.
+ * 2) Wait for any running NMIs to finish and set a flag that says
+ *    we are modifying code, it is done in an atomic operation.
+ * 3) Write the code
+ * 4) clear the flag and wait for any running NMIs to finish,
+ *    it is also done in an atomic operation.
  *
  * If an NMI is executed, the first thing it does is to call
  * "ftrace_nmi_enter". This will check if the flag is set to write
@@ -95,9 +95,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  * are the same as what exists.
  */
 
+#define MOD_CODE_WRITE_FLAG (1 << 31)	/* set when NMI should do the write */
 static atomic_t nmi_running = ATOMIC_INIT(0);
 static int mod_code_status;		/* holds return value of text write */
-static int mod_code_write;		/* set when NMI should do the write */
 static void *mod_code_ip;		/* holds the IP to write to */
 static void *mod_code_newcode;		/* holds the text to write to the IP */
 
@@ -124,40 +124,36 @@ static void ftrace_mod_code(void)
 	 */
 	mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
 					     MCOUNT_INSN_SIZE);
-
-	/* if we fail, then kill any new writers */
-	if (mod_code_status)
-		mod_code_write = 0;
 }
 
 void ftrace_nmi_enter(void)
 {
-	atomic_inc(&nmi_running);
-	/* Must have nmi_running seen before reading write flag */
-	smp_mb();
-	if (mod_code_write) {
+	if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
+		smp_rmb();
 		ftrace_mod_code();
 		atomic_inc(&nmi_update_count);
 	}
+	/* Must have previous changes seen before executions */
+	smp_mb();
 }
 
 void ftrace_nmi_exit(void)
 {
 	/* Finish all executions before clearing nmi_running */
-	smp_wmb();
+	smp_mb();
 	atomic_dec(&nmi_running);
 }
 
-static void wait_for_nmi(void)
+static void wait_and_set(int wait_val, int set_val)
 {
-	if (!atomic_read(&nmi_running))
-		return;
+	int wait = 0;
 
-	do {
+	while (atomic_cmpxchg(&nmi_running, wait_val, set_val) != wait_val) {
+		wait = 1;
 		cpu_relax();
-	} while (atomic_read(&nmi_running));
+	}
 
-	nmi_wait_count++;
+	nmi_wait_count += wait;
 }
 
 static int
@@ -166,15 +162,13 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	mod_code_ip = (void *)ip;
 	mod_code_newcode = new_code;
 
-	/* The buffers need to be visible before we let NMIs write them */
-	smp_wmb();
-
-	mod_code_write = 1;
-
-	/* Make sure write bit is visible before we wait on NMIs */
+	/*
+	 * The previous variables need to be visible before NMIs sees
+	 * the MOD_CODE_WRITE_FLAG.
+	 */
 	smp_mb();
 
-	wait_for_nmi();
+	wait_and_set(0, MOD_CODE_WRITE_FLAG);
 
 	/* Make sure all running NMIs have finished before we write the code */
 	smp_mb();
@@ -182,14 +176,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	ftrace_mod_code();
 
 	/* Make sure the write happens before clearing the bit */
-	smp_wmb();
-
-	mod_code_write = 0;
-
-	/* make sure NMIs see the cleared bit */
 	smp_mb();
 
-	wait_for_nmi();
+	wait_and_set(MOD_CODE_WRITE_FLAG, 0);
 
 	return mod_code_status;
 }






^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/2] ftrace: fast path for do_ftrace_mod_code()
  2009-03-17 12:54   ` [PATCH 1/2] ftrace: protect running nmi Lai Jiangshan
@ 2009-03-17 12:58     ` Lai Jiangshan
  2009-03-17 14:39       ` Steven Rostedt
  2009-03-18  8:42     ` [PATCH] ftrace: protect running nmi (V3) Lai Jiangshan
  1 sibling, 1 reply; 10+ messages in thread
From: Lai Jiangshan @ 2009-03-17 12:58 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Ingo Molnar, LKML

Lai Jiangshan wrote:
> 
> Subject: [PATCH 1/2] ftrace: protect running nmi (V2)
> 


Subject: [PATCH 2/2] ftrace: fast path for do_ftrace_mod_code()

commit 90c7ac49aa819feb9433b5310089fca6399881c0
adds a fast path to prevent NMI lockup.

But the previous patch "protect executing nmi" changes
do_ftrace_mod_code()'s implementation, we still need fix to
prevent NMI lockup by adding a fast path.

A difference between this fix and 90c7ac49aa819feb9433b5310089fca6399881c0
is that: We kill any new writers in spite of probe_kernel_write()
success or fail, not only when probe_kernel_write() fail.
(When probe_kernel_write() success, new writers do not need to do
it again.)

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 699a1c0..61cb520 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -98,6 +98,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 #define MOD_CODE_WRITE_FLAG (1 << 31)	/* set when NMI should do the write */
 static atomic_t nmi_running = ATOMIC_INIT(0);
 static int mod_code_status;		/* holds return value of text write */
+static int mod_code_no_write = 1;	/* set when NMI not need do the write */
 static void *mod_code_ip;		/* holds the IP to write to */
 static void *mod_code_newcode;		/* holds the text to write to the IP */
 
@@ -124,14 +125,19 @@ static void ftrace_mod_code(void)
 	 */
 	mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
 					     MCOUNT_INSN_SIZE);
+
+	smb_wmb();
+	mod_code_no_write = 1;
 }
 
 void ftrace_nmi_enter(void)
 {
 	if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
 		smp_rmb();
-		ftrace_mod_code();
-		atomic_inc(&nmi_update_count);
+		if (!mod_code_no_write) {
+			ftrace_mod_code();
+			atomic_inc(&nmi_update_count);
+		}
 	}
 	/* Must have previous changes seen before executions */
 	smp_mb();
@@ -161,6 +167,7 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 {
 	mod_code_ip = (void *)ip;
 	mod_code_newcode = new_code;
+	mod_code_no_write = 0;
 
 	/*
 	 * The previous variables need to be visible before NMIs sees
@@ -173,7 +180,8 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	/* Make sure all running NMIs have finished before we write the code */
 	smp_mb();
 
-	ftrace_mod_code();
+	if (!mod_code_no_write)
+		ftrace_mod_code();
 
 	/* Make sure the write happens before clearing the bit */
 	smp_mb();




^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] ftrace: fast path for do_ftrace_mod_code()
  2009-03-17 12:58     ` [PATCH 2/2] ftrace: fast path for do_ftrace_mod_code() Lai Jiangshan
@ 2009-03-17 14:39       ` Steven Rostedt
  2009-03-18  7:02         ` Lai Jiangshan
  0 siblings, 1 reply; 10+ messages in thread
From: Steven Rostedt @ 2009-03-17 14:39 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Ingo Molnar, LKML


On Tue, 2009-03-17 at 20:58 +0800, Lai Jiangshan wrote:
> Lai Jiangshan wrote:
> > 
> > Subject: [PATCH 1/2] ftrace: protect running nmi (V2)
> > 
> 
> 
> Subject: [PATCH 2/2] ftrace: fast path for do_ftrace_mod_code()
> 
> commit 90c7ac49aa819feb9433b5310089fca6399881c0
> adds a fast path to prevent NMI lockup.
> 
> But the previous patch "protect executing nmi" changes
> do_ftrace_mod_code()'s implementation, we still need fix to
> prevent NMI lockup by adding a fast path.
> 
> A difference between this fix and 90c7ac49aa819feb9433b5310089fca6399881c0
> is that: We kill any new writers in spite of probe_kernel_write()
> success or fail, not only when probe_kernel_write() fail.
> (When probe_kernel_write() success, new writers do not need to do
> it again.)

I'm a bit nervous about this code. We do not get much benefit from it,
because the NMI case is an anomaly, and is not a fast path anyway. This
code only happens when we are running the stop_machine, and this adds
added complexity for little benefit.

The original patch was to prevent an actual live lock I got in one of my
tests. The problem was that the failure of the write caused a printk
stack dump. But the time it took the print to go out over the serial was
long enough that the next NMI triggered when it finished. The new NMI
hit the same error and did another print. Thus, all I got was a lot of
prints out over the serial, but the system was dead.

I like the first patch. but you remove the protection there. It should
have been in this patch. But it should have still added the
functionality of the previous method.

> 
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> ---
> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 699a1c0..61cb520 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -98,6 +98,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
>  #define MOD_CODE_WRITE_FLAG (1 << 31)	/* set when NMI should do the write */
>  static atomic_t nmi_running = ATOMIC_INIT(0);
>  static int mod_code_status;		/* holds return value of text write */
> +static int mod_code_no_write = 1;	/* set when NMI not need do the write */
>  static void *mod_code_ip;		/* holds the IP to write to */
>  static void *mod_code_newcode;		/* holds the text to write to the IP */
>  
> @@ -124,14 +125,19 @@ static void ftrace_mod_code(void)
>  	 */
>  	mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
>  					     MCOUNT_INSN_SIZE);
> +
> +	smb_wmb();

I still rather have this only set when mod_code_status fails.

> +	mod_code_no_write = 1;
>  }
>  
>  void ftrace_nmi_enter(void)
>  {
>  	if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
>  		smp_rmb();
> -		ftrace_mod_code();
> -		atomic_inc(&nmi_update_count);
> +		if (!mod_code_no_write) {
> +			ftrace_mod_code();
> +			atomic_inc(&nmi_update_count);
> +		}
>  	}
>  	/* Must have previous changes seen before executions */
>  	smp_mb();
> @@ -161,6 +167,7 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
>  {
>  	mod_code_ip = (void *)ip;
>  	mod_code_newcode = new_code;
> +	mod_code_no_write = 0;

Here's another issue, if mod_code_status failed, we do not want to have
mod_code_no_write become zero again. The logic may indeed prevent this,
but I rather have the logic be straight forward, and just set this to
one when we have a failure and forget about it. Yes, it is a bit more
expensive, but it makes the code clearer.

-- Steve

>  
>  	/*
>  	 * The previous variables need to be visible before NMIs sees
> @@ -173,7 +180,8 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
>  	/* Make sure all running NMIs have finished before we write the code */
>  	smp_mb();
>  
> -	ftrace_mod_code();
> +	if (!mod_code_no_write)
> +		ftrace_mod_code();
>  
>  	/* Make sure the write happens before clearing the bit */
>  	smp_mb();
> 
> 
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 2/2] ftrace: fast path for do_ftrace_mod_code()
  2009-03-17 14:39       ` Steven Rostedt
@ 2009-03-18  7:02         ` Lai Jiangshan
  0 siblings, 0 replies; 10+ messages in thread
From: Lai Jiangshan @ 2009-03-18  7:02 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Ingo Molnar, LKML

Steven Rostedt wrote:
> 
> I'm a bit nervous about this code. We do not get much benefit from it,
> because the NMI case is an anomaly, and is not a fast path anyway. This
> code only happens when we are running the stop_machine, and this adds
> added complexity for little benefit.
> 
> The original patch was to prevent an actual live lock I got in one of my
> tests. The problem was that the failure of the write caused a printk
> stack dump. But the time it took the print to go out over the serial was
> long enough that the next NMI triggered when it finished. The new NMI
> hit the same error and did another print. Thus, all I got was a lot of
> prints out over the serial, but the system was dead.
> 

Thank you. I understand.


> I like the first patch. but you remove the protection there. It should
> have been in this patch. But it should have still added the
> functionality of the previous method.

I separated it into two parts, I thought it will good for review.
But I wrote two bad patches.

>> @@ -161,6 +167,7 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
>>  {
>>  	mod_code_ip = (void *)ip;
>>  	mod_code_newcode = new_code;
>> +	mod_code_no_write = 0;
> 
> Here's another issue, if mod_code_status failed, we do not want to have
> mod_code_no_write become zero again. The logic may indeed prevent this,
> but I rather have the logic be straight forward, and just set this to
> one when we have a failure and forget about it. Yes, it is a bit more
> expensive, but it makes the code clearer.

It confused me.

do_ftrace_mod_code() is called sequently, mod_code_no_write should become zero
in new calls.

Not like old code, when the first patch is applied, there is no NMI
is attempt to call probe_kernel_write() when we just enter do_ftrace_mod_code(),
so setting mod_code_no_write to 0 is safe.(Because the flag is not set)

Lai.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] ftrace: protect running nmi (V3)
  2009-03-17 12:54   ` [PATCH 1/2] ftrace: protect running nmi Lai Jiangshan
  2009-03-17 12:58     ` [PATCH 2/2] ftrace: fast path for do_ftrace_mod_code() Lai Jiangshan
@ 2009-03-18  8:42     ` Lai Jiangshan
  2009-03-19  0:33       ` Steven Rostedt
  2009-03-20 10:18       ` [tip:tracing/ftrace] " Lai Jiangshan
  1 sibling, 2 replies; 10+ messages in thread
From: Lai Jiangshan @ 2009-03-18  8:42 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Ingo Molnar, LKML


When I review the sensitive code ftrace_nmi_enter(), I found
the atomic variable nmi_running does protect NMI VS do_ftrace_mod_code(),
but it can not protects NMI(entered nmi) VS NMI(ftrace_nmi_enter()).

cpu#1                   | cpu#2                 | cpu#3
ftrace_nmi_enter()      | do_ftrace_mod_code()  |
  not modify            |                       |
------------------------|-----------------------|--
executing               | set mod_code_write = 1|
executing             --|-----------------------|--------------------
executing               |                       | ftrace_nmi_enter()
executing               |                       |    do modify
------------------------|-----------------------|-----------------
ftrace_nmi_exit()       |                       |

cpu#3 may be being modified the code which is still being executed on cpu#1,
it will have undefined results and possibly take a GPF, this patch
prevents it occurred.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d0d7f4..0edb5c2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -79,11 +79,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  *
  * 1) Put the instruction pointer into the IP buffer
  *    and the new code into the "code" buffer.
- * 2) Set a flag that says we are modifying code
- * 3) Wait for any running NMIs to finish.
- * 4) Write the code
- * 5) clear the flag.
- * 6) Wait for any running NMIs to finish.
+ * 2) Wait for any running NMIs to finish and set a flag that says
+ *    we are modifying code, it is done in an atomic operation.
+ * 3) Write the code
+ * 4) clear the flag.
+ * 5) Wait for any running NMIs to finish.
  *
  * If an NMI is executed, the first thing it does is to call
  * "ftrace_nmi_enter". This will check if the flag is set to write
@@ -95,9 +95,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  * are the same as what exists.
  */
 
+#define MOD_CODE_WRITE_FLAG (1 << 31)	/* set when NMI should do the write */
 static atomic_t nmi_running = ATOMIC_INIT(0);
 static int mod_code_status;		/* holds return value of text write */
-static int mod_code_write;		/* set when NMI should do the write */
 static void *mod_code_ip;		/* holds the IP to write to */
 static void *mod_code_newcode;		/* holds the text to write to the IP */
 
@@ -114,6 +114,20 @@ int ftrace_arch_read_dyn_info(char *buf, int size)
 	return r;
 }
 
+static void clear_mod_flag(void)
+{
+	int old = atomic_read(&nmi_running);
+
+	for (;;) {
+		int new = old & ~MOD_CODE_WRITE_FLAG;
+
+		if (old == new)
+			break;
+
+		old = atomic_cmpxchg(&nmi_running, old, new);
+	}
+}
+
 static void ftrace_mod_code(void)
 {
 	/*
@@ -127,27 +141,39 @@ static void ftrace_mod_code(void)
 
 	/* if we fail, then kill any new writers */
 	if (mod_code_status)
-		mod_code_write = 0;
+		clear_mod_flag();
 }
 
 void ftrace_nmi_enter(void)
 {
-	atomic_inc(&nmi_running);
-	/* Must have nmi_running seen before reading write flag */
-	smp_mb();
-	if (mod_code_write) {
+	if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
+		smp_rmb();
 		ftrace_mod_code();
 		atomic_inc(&nmi_update_count);
 	}
+	/* Must have previous changes seen before executions */
+	smp_mb();
 }
 
 void ftrace_nmi_exit(void)
 {
 	/* Finish all executions before clearing nmi_running */
-	smp_wmb();
+	smp_mb();
 	atomic_dec(&nmi_running);
 }
 
+static void wait_for_nmi_and_set_mod_flag(void)
+{
+	if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
+		rerurn;
+
+	do {
+		cpu_relax();
+	} while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
+
+	nmi_wait_count++;
+}
+
 static void wait_for_nmi(void)
 {
 	if (!atomic_read(&nmi_running))
@@ -167,14 +193,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	mod_code_newcode = new_code;
 
 	/* The buffers need to be visible before we let NMIs write them */
-	smp_wmb();
-
-	mod_code_write = 1;
-
-	/* Make sure write bit is visible before we wait on NMIs */
 	smp_mb();
 
-	wait_for_nmi();
+	wait_for_nmi_and_set_mod_flag();
 
 	/* Make sure all running NMIs have finished before we write the code */
 	smp_mb();
@@ -182,13 +203,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	ftrace_mod_code();
 
 	/* Make sure the write happens before clearing the bit */
-	smp_wmb();
-
-	mod_code_write = 0;
-
-	/* make sure NMIs see the cleared bit */
 	smp_mb();
 
+	clear_mod_flag();
 	wait_for_nmi();
 
 	return mod_code_status;


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] ftrace: protect running nmi (V3)
  2009-03-18  8:42     ` [PATCH] ftrace: protect running nmi (V3) Lai Jiangshan
@ 2009-03-19  0:33       ` Steven Rostedt
  2009-03-19  2:02         ` Lai Jiangshan
  2009-03-20 10:18       ` [tip:tracing/ftrace] " Lai Jiangshan
  1 sibling, 1 reply; 10+ messages in thread
From: Steven Rostedt @ 2009-03-19  0:33 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Ingo Molnar, LKML


On Wed, 2009-03-18 at 16:42 +0800, Lai Jiangshan wrote:

>  
> +static void wait_for_nmi_and_set_mod_flag(void)
> +{
> +	if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
> +		rerurn;

You might want to compile check patches before sending. I'll look this
patch over in detail, and make the necessary fixes.

-- Steve

> +
> +	do {
> +		cpu_relax();
> +	} while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
> +
> +	nmi_wait_count++;
> +}
> +


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ftrace: protect running nmi (V3)
  2009-03-19  0:33       ` Steven Rostedt
@ 2009-03-19  2:02         ` Lai Jiangshan
  0 siblings, 0 replies; 10+ messages in thread
From: Lai Jiangshan @ 2009-03-19  2:02 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Ingo Molnar, LKML

Steven Rostedt wrote:
> On Wed, 2009-03-18 at 16:42 +0800, Lai Jiangshan wrote:
> 
>>  
>> +static void wait_for_nmi_and_set_mod_flag(void)
>> +{
>> +	if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
>> +		rerurn;
> 
> You might want to compile check patches before sending. I'll look this
> patch over in detail, and make the necessary fixes.
> 

Sorry for it.
(I set CONFIG_DYNAMIC_FTRACE=n temporarily yesterday, ^_^)

Subject: [PATCH] ftrace: protect running nmi (V3)

When I review the sensitive code ftrace_nmi_enter(), I found
the atomic variable nmi_running does protect NMI VS do_ftrace_mod_code(),
but it can not protects NMI(entered nmi) VS NMI(ftrace_nmi_enter()).

cpu#1                   | cpu#2                 | cpu#3
ftrace_nmi_enter()      | do_ftrace_mod_code()  |
  not modify            |                       |
------------------------|-----------------------|--
executing               | set mod_code_write = 1|
executing             --|-----------------------|--------------------
executing               |                       | ftrace_nmi_enter()
executing               |                       |    do modify
------------------------|-----------------------|-----------------
ftrace_nmi_exit()       |                       |

cpu#3 may be being modified the code which is still being executed on cpu#1,
it will have undefined results and possibly take a GPF, this patch
prevents it occurred.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d0d7f4..f0c7fad 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -79,11 +79,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  *
  * 1) Put the instruction pointer into the IP buffer
  *    and the new code into the "code" buffer.
- * 2) Set a flag that says we are modifying code
- * 3) Wait for any running NMIs to finish.
- * 4) Write the code
- * 5) clear the flag.
- * 6) Wait for any running NMIs to finish.
+ * 2) Wait for any running NMIs to finish and set a flag that says
+ *    we are modifying code, it is done in an atomic operation.
+ * 3) Write the code
+ * 4) clear the flag.
+ * 5) Wait for any running NMIs to finish.
  *
  * If an NMI is executed, the first thing it does is to call
  * "ftrace_nmi_enter". This will check if the flag is set to write
@@ -95,9 +95,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  * are the same as what exists.
  */
 
+#define MOD_CODE_WRITE_FLAG (1 << 31)	/* set when NMI should do the write */
 static atomic_t nmi_running = ATOMIC_INIT(0);
 static int mod_code_status;		/* holds return value of text write */
-static int mod_code_write;		/* set when NMI should do the write */
 static void *mod_code_ip;		/* holds the IP to write to */
 static void *mod_code_newcode;		/* holds the text to write to the IP */
 
@@ -114,6 +114,19 @@ int ftrace_arch_read_dyn_info(char *buf, int size)
 	return r;
 }
 
+static void clear_mod_flag(void)
+{
+	for (;;) {
+		int old = atomic_read(&nmi_running);
+		int new = old & ~MOD_CODE_WRITE_FLAG;
+
+		if (old == new)
+			break;
+
+		old = atomic_cmpxchg(&nmi_running, old, new);
+	}
+}
+
 static void ftrace_mod_code(void)
 {
 	/*
@@ -127,27 +140,39 @@ static void ftrace_mod_code(void)
 
 	/* if we fail, then kill any new writers */
 	if (mod_code_status)
-		mod_code_write = 0;
+		clear_mod_flag();
 }
 
 void ftrace_nmi_enter(void)
 {
-	atomic_inc(&nmi_running);
-	/* Must have nmi_running seen before reading write flag */
-	smp_mb();
-	if (mod_code_write) {
+	if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
+		smp_rmb();
 		ftrace_mod_code();
 		atomic_inc(&nmi_update_count);
 	}
+	/* Must have previous changes seen before executions */
+	smp_mb();
 }
 
 void ftrace_nmi_exit(void)
 {
 	/* Finish all executions before clearing nmi_running */
-	smp_wmb();
+	smp_mb();
 	atomic_dec(&nmi_running);
 }
 
+static void wait_for_nmi_and_set_mod_flag(void)
+{
+	if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
+		return;
+
+	do {
+		cpu_relax();
+	} while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
+
+	nmi_wait_count++;
+}
+
 static void wait_for_nmi(void)
 {
 	if (!atomic_read(&nmi_running))
@@ -167,14 +192,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	mod_code_newcode = new_code;
 
 	/* The buffers need to be visible before we let NMIs write them */
-	smp_wmb();
-
-	mod_code_write = 1;
-
-	/* Make sure write bit is visible before we wait on NMIs */
 	smp_mb();
 
-	wait_for_nmi();
+	wait_for_nmi_and_set_mod_flag();
 
 	/* Make sure all running NMIs have finished before we write the code */
 	smp_mb();
@@ -182,13 +202,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	ftrace_mod_code();
 
 	/* Make sure the write happens before clearing the bit */
-	smp_wmb();
-
-	mod_code_write = 0;
-
-	/* make sure NMIs see the cleared bit */
 	smp_mb();
 
+	clear_mod_flag();
 	wait_for_nmi();
 
 	return mod_code_status;


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [tip:tracing/ftrace] ftrace: protect running nmi (V3)
  2009-03-18  8:42     ` [PATCH] ftrace: protect running nmi (V3) Lai Jiangshan
  2009-03-19  0:33       ` Steven Rostedt
@ 2009-03-20 10:18       ` Lai Jiangshan
  1 sibling, 0 replies; 10+ messages in thread
From: Lai Jiangshan @ 2009-03-20 10:18 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, srostedt, tglx, laijs

Commit-ID:  e9d9df44736d116726f4596f7e2f9ce2764ffc0a
Gitweb:     http://git.kernel.org/tip/e9d9df44736d116726f4596f7e2f9ce2764ffc0a
Author:     Lai Jiangshan <laijs@cn.fujitsu.com>
AuthorDate: Wed, 18 Mar 2009 16:42:57 +0800
Committer:  Steven Rostedt <srostedt@redhat.com>
CommitDate: Wed, 18 Mar 2009 20:36:59 -0400

ftrace: protect running nmi (V3)

When I review the sensitive code ftrace_nmi_enter(), I found
the atomic variable nmi_running does protect NMI VS do_ftrace_mod_code(),
but it can not protects NMI(entered nmi) VS NMI(ftrace_nmi_enter()).

cpu#1                   | cpu#2                 | cpu#3
ftrace_nmi_enter()      | do_ftrace_mod_code()  |
  not modify            |                       |
------------------------|-----------------------|--
executing               | set mod_code_write = 1|
executing             --|-----------------------|--------------------
executing               |                       | ftrace_nmi_enter()
executing               |                       |    do modify
------------------------|-----------------------|-----------------
ftrace_nmi_exit()       |                       |

cpu#3 may be being modified the code which is still being executed on cpu#1,
it will have undefined results and possibly take a GPF, this patch
prevents it occurred.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <49C0B411.30003@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>


---
 arch/x86/kernel/ftrace.c |   63 +++++++++++++++++++++++++++++----------------
 1 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1d0d7f4..57b33ed 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -79,11 +79,11 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  *
  * 1) Put the instruction pointer into the IP buffer
  *    and the new code into the "code" buffer.
- * 2) Set a flag that says we are modifying code
- * 3) Wait for any running NMIs to finish.
- * 4) Write the code
- * 5) clear the flag.
- * 6) Wait for any running NMIs to finish.
+ * 2) Wait for any running NMIs to finish and set a flag that says
+ *    we are modifying code, it is done in an atomic operation.
+ * 3) Write the code
+ * 4) clear the flag.
+ * 5) Wait for any running NMIs to finish.
  *
  * If an NMI is executed, the first thing it does is to call
  * "ftrace_nmi_enter". This will check if the flag is set to write
@@ -95,9 +95,9 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
  * are the same as what exists.
  */
 
+#define MOD_CODE_WRITE_FLAG (1 << 31)	/* set when NMI should do the write */
 static atomic_t nmi_running = ATOMIC_INIT(0);
 static int mod_code_status;		/* holds return value of text write */
-static int mod_code_write;		/* set when NMI should do the write */
 static void *mod_code_ip;		/* holds the IP to write to */
 static void *mod_code_newcode;		/* holds the text to write to the IP */
 
@@ -114,6 +114,20 @@ int ftrace_arch_read_dyn_info(char *buf, int size)
 	return r;
 }
 
+static void clear_mod_flag(void)
+{
+	int old = atomic_read(&nmi_running);
+
+	for (;;) {
+		int new = old & ~MOD_CODE_WRITE_FLAG;
+
+		if (old == new)
+			break;
+
+		old = atomic_cmpxchg(&nmi_running, old, new);
+	}
+}
+
 static void ftrace_mod_code(void)
 {
 	/*
@@ -127,27 +141,39 @@ static void ftrace_mod_code(void)
 
 	/* if we fail, then kill any new writers */
 	if (mod_code_status)
-		mod_code_write = 0;
+		clear_mod_flag();
 }
 
 void ftrace_nmi_enter(void)
 {
-	atomic_inc(&nmi_running);
-	/* Must have nmi_running seen before reading write flag */
-	smp_mb();
-	if (mod_code_write) {
+	if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
+		smp_rmb();
 		ftrace_mod_code();
 		atomic_inc(&nmi_update_count);
 	}
+	/* Must have previous changes seen before executions */
+	smp_mb();
 }
 
 void ftrace_nmi_exit(void)
 {
 	/* Finish all executions before clearing nmi_running */
-	smp_wmb();
+	smp_mb();
 	atomic_dec(&nmi_running);
 }
 
+static void wait_for_nmi_and_set_mod_flag(void)
+{
+	if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
+		return;
+
+	do {
+		cpu_relax();
+	} while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
+
+	nmi_wait_count++;
+}
+
 static void wait_for_nmi(void)
 {
 	if (!atomic_read(&nmi_running))
@@ -167,14 +193,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	mod_code_newcode = new_code;
 
 	/* The buffers need to be visible before we let NMIs write them */
-	smp_wmb();
-
-	mod_code_write = 1;
-
-	/* Make sure write bit is visible before we wait on NMIs */
 	smp_mb();
 
-	wait_for_nmi();
+	wait_for_nmi_and_set_mod_flag();
 
 	/* Make sure all running NMIs have finished before we write the code */
 	smp_mb();
@@ -182,13 +203,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code)
 	ftrace_mod_code();
 
 	/* Make sure the write happens before clearing the bit */
-	smp_wmb();
-
-	mod_code_write = 0;
-
-	/* make sure NMIs see the cleared bit */
 	smp_mb();
 
+	clear_mod_flag();
 	wait_for_nmi();
 
 	return mod_code_status;

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2009-03-20 10:19 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-03-16 12:54 [PATCH] ftrace: protect executing nmi Lai Jiangshan
2009-03-16 17:42 ` Steven Rostedt
2009-03-17 12:54   ` [PATCH 1/2] ftrace: protect running nmi Lai Jiangshan
2009-03-17 12:58     ` [PATCH 2/2] ftrace: fast path for do_ftrace_mod_code() Lai Jiangshan
2009-03-17 14:39       ` Steven Rostedt
2009-03-18  7:02         ` Lai Jiangshan
2009-03-18  8:42     ` [PATCH] ftrace: protect running nmi (V3) Lai Jiangshan
2009-03-19  0:33       ` Steven Rostedt
2009-03-19  2:02         ` Lai Jiangshan
2009-03-20 10:18       ` [tip:tracing/ftrace] " Lai Jiangshan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).