Re: [PATCH] uprobes: use BX register for rip-relative fixups, not AX

linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* Re: [PATCH] uprobes: use BX register for rip-relative fixups, not AX
       [not found] ` <1398704774-25173-2-git-send-email-dvlasenk@redhat.com>
@ 2014-04-28 17:34   ` Oleg Nesterov
  2014-04-28 19:06     ` Denys Vlasenko
  2014-04-28 17:44   ` Denys Vlasenko
  2014-05-01  0:29   ` Jim Keniston
  2 siblings, 1 reply; 8+ messages in thread
From: Oleg Nesterov @ 2014-04-28 17:34 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Jim Keniston, Masami Hiramatsu, Srikar Dronamraju,
	Ingo Molnar

Thanks...

Again, the change in riprel_analyze() needs the review from someone
who understands the instruction decoding/encoding.

On 04/28, Denys Vlasenko wrote:
>
> Otherwise, instructions such as cmpxchg and div will be mishandled.

It seems that you are right. But it would be really great if you also
provide the test-case which proves the fix ;)

> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Jim Keniston <jkenisto@us.ibm.com>
> CC: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
> CC: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
> CC: Ingo Molnar <mingo@kernel.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> ---
>  arch/x86/kernel/uprobes.c | 57 ++++++++++++++++++++++++++++++-----------------
>  1 file changed, 37 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
> index ae6df8e..bc70182 100644
> --- a/arch/x86/kernel/uprobes.c
> +++ b/arch/x86/kernel/uprobes.c
> @@ -41,7 +41,7 @@
>  /* Instruction will modify TF, don't change it */
>  #define UPROBE_FIX_SETF		0x04
>  
> -#define UPROBE_FIX_RIP_AX	0x08
> +#define UPROBE_FIX_RIP_BX	0x08
>  #define UPROBE_FIX_RIP_CX	0x10
>  
>  #define	UPROBE_TRAP_NR		UINT_MAX
> @@ -282,7 +282,7 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
>  	/*
>  	 * insn_rip_relative() would have decoded rex_prefix, modrm.
>  	 * Clear REX.b bit (extension of MODRM.rm field):
> -	 * we want to encode rax/rcx, not r8/r9.
> +	 * we want to encode rbx/rcx, not r11/r9.
>  	 */
>  	if (insn->rex_prefix.nbytes) {
>  		cursor = auprobe->insn + insn_offset_rex_prefix(insn);
> @@ -296,41 +296,58 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
>  	 */
>  	cursor = auprobe->insn + insn_offset_modrm(insn);
>  	/*
> -	 * Convert from rip-relative addressing to register-relative addressing
> -	 * via a scratch register.
> +	 * Convert from rip-relative addressing
> +	 * to register-relative addressing via a scratch register.
>  	 */
>  	reg = MODRM_REG(insn);
> -	if (reg == 0) {
> +	if (reg == 3) {
>  		/*
> -		 * The register operand (if any) is either the A register
> -		 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
> -		 * REX prefix) %r8.  In any case, we know the C register
> +		 * The register operand (if any) is either the B register
> +		 * (%rbx, %ebx, etc.) or (if the 0x4 bit is set in the
> +		 * REX prefix) %r11.  In any case, we know the C register
>  		 * is NOT the register operand, so we use %rcx (register
>  		 * #1) for the scratch register.
>  		 */
>  		auprobe->def.fixups |= UPROBE_FIX_RIP_CX;
>  		/*
> -		 * Change modrm from "00 000 101" to "10 000 001". Example:
> -		 * 89 05 disp32  mov %eax,disp32(%rip) becomes
> -		 * 89 81 disp32  mov %eax,disp32(%rcx)
> +		 * Change modrm from "00 011 101" to "10 011 001". Example:
> +		 * 89 1d disp32  mov %ebx,disp32(%rip) becomes
> +		 * 89 99 disp32  mov %ebx,disp32(%rcx)
>  		 */
> -		*cursor = 0x81;
> +		*cursor = 0x99;
>  	} else {
> -		/* Use %rax (register #0) for the scratch register. */
> -		auprobe->def.fixups |= UPROBE_FIX_RIP_AX;
> +		/* Use %rbx (register #3) for the scratch register. */
> +		auprobe->def.fixups |= UPROBE_FIX_RIP_BX;
>  		/*
> -		 * Change modrm from "00 reg 101" to "10 reg 000". Example:
> +		 * Change modrm from "00 reg 101" to "10 reg 011". Example:
>  		 * 89 1d disp32  mov %edx,disp32(%rip) becomes
> -		 * 89 98 disp32  mov %edx,disp32(%rax)
> +		 * 89 9b disp32  mov %edx,disp32(%rbx)
>  		 */
> -		*cursor = (reg << 3) | 0x80;
> +		*cursor = (reg << 3) | 0x83;
>  	}
> +	/*
> +	 * Note: we can't use rax or rdx registers as scratch!
> +	 * There are 3-operand insns which use rax or rdx:rax
> +	 * as an implicit operand, _and_ they use modrm byte
> +	 * whose reg field indicates third register or opcode extension.
> +	 * In particular, these insns:
> +	 *  f7/6 r/m        div r/m
> +	 *  0f b1 r/m       cmpxchg r/m,reg
> +	 *  0f c7/1 mem     cmpxchg{8b,16b} mem
> +	 * Looking at "reg" field won't allow to detect that rax or rdx
> +	 * are in use.
> +	 *
> +	 * FIXME: handle vex-encoded 3-operand insns too:
> +	 *  c4 e2 60 f7 0d disp32  bextr %ebx,disp32(%rip),%ecx
> +	 * %ebx is encoded in the vex.vvvv field, which we don't check
> +	 * (in this example, it's in byte 60 bits 6..3).
> +	 */
>  }
>  
>  static inline unsigned long *
>  scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
>  {
> -	return (auprobe->def.fixups & UPROBE_FIX_RIP_AX) ? &regs->ax : &regs->cx;
> +	return (auprobe->def.fixups & UPROBE_FIX_RIP_BX) ? &regs->bx : &regs->cx;
>  }
>  
>  /*
> @@ -339,7 +356,7 @@ scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs)
>   */
>  static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
>  {
> -	if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
> +	if (auprobe->def.fixups & (UPROBE_FIX_RIP_BX | UPROBE_FIX_RIP_CX)) {
>  		struct uprobe_task *utask = current->utask;
>  		unsigned long *sr = scratch_reg(auprobe, regs);
>  
> @@ -350,7 +367,7 @@ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
>  
>  static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
>  {
> -	if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
> +	if (auprobe->def.fixups & (UPROBE_FIX_RIP_BX | UPROBE_FIX_RIP_CX)) {
>  		struct uprobe_task *utask = current->utask;
>  		unsigned long *sr = scratch_reg(auprobe, regs);
>  
> -- 
> 1.8.1.4
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] uprobes: use BX register for rip-relative fixups, not AX
       [not found] ` <1398704774-25173-2-git-send-email-dvlasenk@redhat.com>
  2014-04-28 17:34   ` [PATCH] uprobes: use BX register for rip-relative fixups, not AX Oleg Nesterov
@ 2014-04-28 17:44   ` Denys Vlasenko
  2014-05-01  0:29   ` Jim Keniston
  2 siblings, 0 replies; 8+ messages in thread
From: Denys Vlasenko @ 2014-04-28 17:44 UTC (permalink / raw)
  To: linux-kernel
  Cc: Denys Vlasenko, Jim Keniston, Masami Hiramatsu, Srikar Dronamraju,
	Ingo Molnar, Oleg Nesterov

On 04/28/2014 07:06 PM, Denys Vlasenko wrote:
> +	 * Note: we can't use rax or rdx registers as scratch!
> +	 * There are 3-operand insns which use rax or rdx:rax
> +	 * as an implicit operand, _and_ they use modrm byte
> +	 * whose reg field indicates third register or opcode extension.
> +	 * In particular, these insns:
> +	 *  f7/6 r/m        div r/m
> +	 *  0f b1 r/m       cmpxchg r/m,reg
> +	 *  0f c7/1 mem     cmpxchg{8b,16b} mem
> +	 * Looking at "reg" field won't allow to detect that rax or rdx
> +	 * are in use.

Eek.... even this is not good enough for cmpxchg8b!
The damn thing uses CX and BX too!

AMD docs say -

"""Compares the value in the rDX:rAX registers with a 64-bit
or 128-bit value in the specified memory location.
If the values are equal, the instruction copies the value
in the rCX:rBX registers to the memory location and sets
the zero flag (ZF) of the rFLAGS register to 1."""

So, my patch does fix the cases of div and cmpxchg insns,
but not cmpxchg8b.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] uprobes: use BX register for rip-relative fixups, not AX
  2014-04-28 17:34   ` [PATCH] uprobes: use BX register for rip-relative fixups, not AX Oleg Nesterov
@ 2014-04-28 19:06     ` Denys Vlasenko
  2014-04-28 19:23       ` Oleg Nesterov
  0 siblings, 1 reply; 8+ messages in thread
From: Denys Vlasenko @ 2014-04-28 19:06 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: linux-kernel, Jim Keniston, Masami Hiramatsu, Srikar Dronamraju,
	Ingo Molnar

On 04/28/2014 07:34 PM, Oleg Nesterov wrote:
> Thanks...
> 
> Again, the change in riprel_analyze() needs the review from someone
> who understands the instruction decoding/encoding.
> 
> On 04/28, Denys Vlasenko wrote:
>>
>> Otherwise, instructions such as cmpxchg and div will be mishandled.
> 
> It seems that you are right. But it would be really great if you also
> provide the test-case which proves the fix ;)

Working on a testcase for this. So far covered div (test1)
and cmpxchg (test2).

Reproduced failure on a fairly old 3.10.11 kernel:

# gcc -Os -Wall test_riprel.c -o test_riprel
# ./test_riprel
test1: pass
test2: pass
# perf probe -x ./test_riprel probe1
# perf record -e probe_test:probe1 ./test_riprel
test1: FAIL
test2: pass
# perf probe -x ./test_riprel probe2
# perf record -e probe_test:probe2 ./test_riprel
test1: pass
test2: FAIL

Source:

test_riprel.c
==================
#include <stdio.h>

static const char *const fail_pass[] = { "FAIL", "pass" };

long two = 2;
long test1()
{
	long ax=0, dx=0;
	asm volatile("\n"
"			xor	%%edx,%%edx\n"
"			lea	2(%%edx),%%eax\n"
// We divide 2 by 2. Result (in eax) should be 1:
"	probe1:		.globl	probe1\n"
"			divl	two(%%rip)\n"
// If we have a bug (eax mangled on entry) the result will be 2,
// because eax gets restored by probe machinery.
	: "=a" (ax), "=d" (dx) /*out*/
	: "0" (ax), "1" (dx) /*in*/
	: "memory" /*clobber*/
	);
	dprintf(2, "%s: %s\n", __func__, fail_pass[ax == 1]);
	return ax;
}

long val2 = 0;
long test2()
{
	long old_val2 = val2;
	long ax=0, dx=0;
	asm volatile("\n"
"			mov	val2,%%eax\n"     // eax := val2
"			lea	1(%%eax),%%edx\n" // edx := eax+1
// eax is equal to val2. cmpxchg should store edx to val2:
"	probe2:		.globl  probe2\n"
"			cmpxchg %%edx,val2(%%rip)\n"
// If we have a bug (eax mangled on entry), val2 will stay unchanged
	: "=a" (ax), "=d" (dx) /*out*/
	: "0" (ax), "1" (dx) /*in*/
	: "memory" /*clobber*/
	);
	dprintf(2, "%s: %s\n", __func__, fail_pass[val2 == old_val2 + 1]);
	return ax == dx;
}

int main()
{
	test1();
	test2();
	return 0;
}


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] uprobes: use BX register for rip-relative fixups, not AX
  2014-04-28 19:06     ` Denys Vlasenko
@ 2014-04-28 19:23       ` Oleg Nesterov
  2014-04-29 10:16         ` Denys Vlasenko
  0 siblings, 1 reply; 8+ messages in thread
From: Oleg Nesterov @ 2014-04-28 19:23 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Jim Keniston, Masami Hiramatsu, Srikar Dronamraju,
	Ingo Molnar

On 04/28, Denys Vlasenko wrote:
>
> On 04/28/2014 07:34 PM, Oleg Nesterov wrote:
> >
> > It seems that you are right. But it would be really great if you also
> > provide the test-case which proves the fix ;)
>
> Working on a testcase for this. So far covered div (test1)
> and cmpxchg (test2).
>
> Reproduced failure on a fairly old 3.10.11 kernel:

Just in case, confirm. Reproduced on v3.14 + all recent uprobes changes.

Thanks.

> # gcc -Os -Wall test_riprel.c -o test_riprel
> # ./test_riprel
> test1: pass
> test2: pass
> # perf probe -x ./test_riprel probe1
> # perf record -e probe_test:probe1 ./test_riprel
> test1: FAIL
> test2: pass
> # perf probe -x ./test_riprel probe2
> # perf record -e probe_test:probe2 ./test_riprel
> test1: pass
> test2: FAIL
> 
> Source:
> 
> test_riprel.c
> ==================
> #include <stdio.h>
> 
> static const char *const fail_pass[] = { "FAIL", "pass" };
> 
> long two = 2;
> long test1()
> {
> 	long ax=0, dx=0;
> 	asm volatile("\n"
> "			xor	%%edx,%%edx\n"
> "			lea	2(%%edx),%%eax\n"
> // We divide 2 by 2. Result (in eax) should be 1:
> "	probe1:		.globl	probe1\n"
> "			divl	two(%%rip)\n"
> // If we have a bug (eax mangled on entry) the result will be 2,
> // because eax gets restored by probe machinery.
> 	: "=a" (ax), "=d" (dx) /*out*/
> 	: "0" (ax), "1" (dx) /*in*/
> 	: "memory" /*clobber*/
> 	);
> 	dprintf(2, "%s: %s\n", __func__, fail_pass[ax == 1]);
> 	return ax;
> }
> 
> long val2 = 0;
> long test2()
> {
> 	long old_val2 = val2;
> 	long ax=0, dx=0;
> 	asm volatile("\n"
> "			mov	val2,%%eax\n"     // eax := val2
> "			lea	1(%%eax),%%edx\n" // edx := eax+1
> // eax is equal to val2. cmpxchg should store edx to val2:
> "	probe2:		.globl  probe2\n"
> "			cmpxchg %%edx,val2(%%rip)\n"
> // If we have a bug (eax mangled on entry), val2 will stay unchanged
> 	: "=a" (ax), "=d" (dx) /*out*/
> 	: "0" (ax), "1" (dx) /*in*/
> 	: "memory" /*clobber*/
> 	);
> 	dprintf(2, "%s: %s\n", __func__, fail_pass[val2 == old_val2 + 1]);
> 	return ax == dx;
> }
> 
> int main()
> {
> 	test1();
> 	test2();
> 	return 0;
> }
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] uprobes: use BX register for rip-relative fixups, not AX
  2014-04-28 19:23       ` Oleg Nesterov
@ 2014-04-29 10:16         ` Denys Vlasenko
  0 siblings, 0 replies; 8+ messages in thread
From: Denys Vlasenko @ 2014-04-29 10:16 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: linux-kernel, Jim Keniston, Masami Hiramatsu, Srikar Dronamraju,
	Ingo Molnar

On 04/28/2014 09:23 PM, Oleg Nesterov wrote:
> On 04/28, Denys Vlasenko wrote:
>>
>> On 04/28/2014 07:34 PM, Oleg Nesterov wrote:
>>>
>>> It seems that you are right. But it would be really great if you also
>>> provide the test-case which proves the fix ;)
>>
>> Working on a testcase for this. So far covered div (test1)
>> and cmpxchg (test2).

I looked thourgh docs and documented other weird insns.

Executive summary: looks like we will need to add [e]vex.b
bit clearing analogous to rex.b clearing we already have,
add checks for [e]vex.vvvv colliding with our selected
scratch reg, therefore we will need *three* candidates
for scratch reg (up from two as we do today).

So far I didn't find instructions using modrm and also
using SI register implicitly. DI register is used only
by one insn (maskmovq) and BX register is used only by
one too (cmpxchg8b).
BP is stack-segment based (may be a problem?).
AX, DX, CX are off-limits (many implicit users).
SP is unusable (it's stack pointer - think about "pop mem";
also, rsp+disp32 needs sib encoding -> insn length change).

Here's a more complete testcase which also tests cmpxchg8b
case:

test_riprel.c
=============
// What instructions have modrm bytes
// and also use registers not encoded in modrm byte?
//
//[i]div/[i]mul: implicitly use dx:ax
//
//shift ops: implicitly use cx
//
//cmpxchg: implicitly uses ax
//
//cmpxchg8/16b: implicitly uses dx:ax and bx:cx
//
//[e]vex insns: can have three operands
//IOW: they may use additional register encoded in [e]vex.vvvv
//
//mulx: implicitly uses dx.
//It's a new instruction (first appeared in Haswell, part of BMI2 instructions)
//mulx r/m,r1,r2: r1:r2 = dx * r/m
//It is vex-encoded. Example where we can use none of bx,cx,dx as scratch:
//c4 e2 63 f6 0d disp32   mulx disp32(%rip),%ebx,%ecx
//
//maskmovq: implicitly uses rdi as pointer to destination memory operand.
//MMXext insn. Encoding: 0f f7 modrm
//[v]maskmovdqu: similar SSE2/AVX insn
//Encoding: 66 0f f7 modrm (or vex-encoded analogue: c5 f9 f7 modrm).
//Stores first operand, byte-masked by second operand msb's in each byte, to (ds:rdi).
//
//AMD says it has no 3-operand form (vex.vvvv must be 1111)
//and that it can have only register operands, not mem
//(its modrm byte must have mode=11)
//However, if we don't special-case analysis of this insn,
//and if a memory-referencing modrm form will be allowed in the future,
//then we see it as "yet another modrm insn",
//can see it using register numbers colliding with our "scratch regs",
//and we may end up selecting rdi register as scrath - which
//is wrong for this insn.
//
//[v]pcmpistri: implicitly uses cx, xmm0
//[v]pcmpistrm: implicitly uses xmm0
//[v]pcmpestri: implicitly uses ax, dx, cx, xmm0
//[v]pcmpestrm: implicitly uses ax, dx, xmm0
//Evil SSE4.2 string comparison ops from hell.

#include <stdio.h>

static const char *const pass[] = { "FAIL", "pass" };

long two = 2;
void test1(void)
{
	long ax = 0, dx = 0;
	asm volatile("\n"
"			xor	%%edx,%%edx\n"
"			lea	2(%%edx),%%eax\n"
// We divide 2 by 2. Result (in eax) should be 1:
"	probe1:		.globl	probe1\n"
"			divl	two(%%rip)\n"
// If we have a bug (eax mangled on entry) the result will be 2,
// because eax gets restored by probe machinery.
	: "=a" (ax), "=d" (dx) /*out*/
	: "0" (ax), "1" (dx) /*in*/
	: "memory" /*clobber*/
	);
	dprintf(2, "%s: %s\n", __func__,
		pass[ax == 1]
	);
}

long val2 = 0;
void test2(void)
{
	long old_val = val2;
	long ax = 0, dx = 0;
	asm volatile("\n"
"			mov	val2,%%eax\n"     // eax := val2
"			lea	1(%%eax),%%edx\n" // edx := eax+1
// eax is equal to val2. cmpxchg should store edx to val2:
"	probe2:		.globl  probe2\n"
"			cmpxchg %%edx,val2(%%rip)\n"
// If we have a bug (eax mangled on entry), val2 will stay unchanged
	: "=a" (ax), "=d" (dx) /*out*/
	: "0" (ax), "1" (dx) /*in*/
	: "memory" /*clobber*/
	);
	dprintf(2, "%s: %s\n", __func__,
		pass[val2 == old_val + 1]
	);
}

long val3[2] = {0,0};
void test3(void)
{
	long old_val = val3[0];
	long ax = 0, dx = 0;
	asm volatile("\n"
"			mov	val3,%%eax\n"  // edx:eax := val3
"			mov	val3+4,%%edx\n"
"			mov	%%eax,%%ebx\n" // ecx:ebx := edx:eax + 1
"			mov	%%edx,%%ecx\n"
"			add	$1,%%ebx\n"
"			adc	$0,%%ecx\n"
// edx:eax is equal to val3. cmpxchg8b should store ecx:ebx to val3:
"	probe3:		.globl  probe3\n"
"			cmpxchg8b val3(%%rip)\n"
// If we have a bug (edx:eax mangled on entry), val3 will stay unchanged.
// If ecx:edx in mangled, val3 will get wrong value.
	: "=a" (ax), "=d" (dx) /*out*/
	: "0" (ax), "1" (dx) /*in*/
	: "cx", "bx", "memory" /*clobber*/
	);
	dprintf(2, "%s: %s\n", __func__,
		pass[val3[0] == old_val + 1 && val3[1] == 0]
	);
}

// Usage:
// gcc -Os -Wall test_riprel.c -o test_riprel
//
// perf probe -x ./test_riprel probe1
// perf record -e probe_test:probe1 ./test_riprel
// ^^^^^^^^^^^^^^^^ repeat for other probeN's
int main(int argc, char **argv)
{
	test1();
	test2();
	test3();
	return 0;
}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3] uprobes: simplify rip-relative handling
       [not found] <1398704774-25173-1-git-send-email-dvlasenk@redhat.com>
@ 2014-04-29 19:09 ` Oleg Nesterov
  2014-05-01  0:17 ` Jim Keniston
       [not found] ` <1398704774-25173-2-git-send-email-dvlasenk@redhat.com>
  2 siblings, 0 replies; 8+ messages in thread
From: Oleg Nesterov @ 2014-04-29 19:09 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Jim Keniston, Masami Hiramatsu, Srikar Dronamraju,
	Ingo Molnar

This version looks fine to me.

Masami, Jim, Srikar, can you ack the change in riprel_analyze() ?

On 04/28, Denys Vlasenko wrote:
>
> It is possible to replace rip-relative addressing mode
> with addressing mode of the same length: (reg+disp32).
> This eliminates the need to fix up immediate
> and correct for changing instruction length.
>
> v2: Rebased on top of Oleg's latest changes and run-tested.
> v3: Removed unnecessary cast.
> Improved comments based on Oleg's feedback.
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Jim Keniston <jkenisto@us.ibm.com>
> CC: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
> CC: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
> CC: Ingo Molnar <mingo@kernel.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> ---
>  arch/x86/include/asm/uprobes.h |  3 ---
>  arch/x86/kernel/uprobes.c      | 61 ++++++++++++++++++------------------------
>  2 files changed, 26 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
> index a040d49..7be3c07 100644
> --- a/arch/x86/include/asm/uprobes.h
> +++ b/arch/x86/include/asm/uprobes.h
> @@ -50,9 +50,6 @@ struct arch_uprobe {
>  			u8	opc1;
>  		}			branch;
>  		struct {
> -#ifdef CONFIG_X86_64
> -			long	riprel_target;
> -#endif
>  			u8	fixups;
>  			u8	ilen;
>  		} 			def;
> diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
> index c229b5f..ae6df8e 100644
> --- a/arch/x86/kernel/uprobes.c
> +++ b/arch/x86/kernel/uprobes.c
> @@ -251,9 +251,9 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
>   * If arch_uprobe->insn doesn't use rip-relative addressing, return
>   * immediately.  Otherwise, rewrite the instruction so that it accesses
>   * its memory operand indirectly through a scratch register.  Set
> - * def->fixups and def->riprel_target accordingly. (The contents of the
> - * scratch register will be saved before we single-step the modified
> - * instruction, and restored afterward).
> + * def->fixups accordingly. (The contents of the scratch register
> + * will be saved before we single-step the modified instruction,
> + * and restored afterward).
>   *
>   * We do this because a rip-relative instruction can access only a
>   * relatively small area (+/- 2 GB from the instruction), and the XOL
> @@ -264,9 +264,12 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
>   *
>   * Some useful facts about rip-relative instructions:
>   *
> - *  - There's always a modrm byte.
> + *  - There's always a modrm byte with bit layout "00 reg 101".
>   *  - There's never a SIB byte.
>   *  - The displacement is always 4 bytes.
> + *  - REX.B=1 bit in REX prefix, which normally extends r/m field,
> + *    has no effect on rip-relative mode. It doesn't make modrm byte
> + *    with r/m=101 refer to register 1101 = R13.
>   */
>  static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
>  {
> @@ -293,9 +296,8 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
>  	 */
>  	cursor = auprobe->insn + insn_offset_modrm(insn);
>  	/*
> -	 * Convert from rip-relative addressing to indirect addressing
> -	 * via a scratch register.  Change the r/m field from 0x5 (%rip)
> -	 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
> +	 * Convert from rip-relative addressing to register-relative addressing
> +	 * via a scratch register.
>  	 */
>  	reg = MODRM_REG(insn);
>  	if (reg == 0) {
> @@ -307,22 +309,21 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
>  		 * #1) for the scratch register.
>  		 */
>  		auprobe->def.fixups |= UPROBE_FIX_RIP_CX;
> -		/* Change modrm from 00 000 101 to 00 000 001. */
> -		*cursor = 0x1;
> +		/*
> +		 * Change modrm from "00 000 101" to "10 000 001". Example:
> +		 * 89 05 disp32  mov %eax,disp32(%rip) becomes
> +		 * 89 81 disp32  mov %eax,disp32(%rcx)
> +		 */
> +		*cursor = 0x81;
>  	} else {
>  		/* Use %rax (register #0) for the scratch register. */
>  		auprobe->def.fixups |= UPROBE_FIX_RIP_AX;
> -		/* Change modrm from 00 xxx 101 to 00 xxx 000 */
> -		*cursor = (reg << 3);
> -	}
> -
> -	/* Target address = address of next instruction + (signed) offset */
> -	auprobe->def.riprel_target = (long)insn->length + insn->displacement.value;
> -
> -	/* Displacement field is gone; slide immediate field (if any) over. */
> -	if (insn->immediate.nbytes) {
> -		cursor++;
> -		memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
> +		/*
> +		 * Change modrm from "00 reg 101" to "10 reg 000". Example:
> +		 * 89 1d disp32  mov %edx,disp32(%rip) becomes
> +		 * 89 98 disp32  mov %edx,disp32(%rax)
> +		 */
> +		*cursor = (reg << 3) | 0x80;
>  	}
>  }
>  
> @@ -343,26 +344,17 @@ static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
>  		unsigned long *sr = scratch_reg(auprobe, regs);
>  
>  		utask->autask.saved_scratch_register = *sr;
> -		*sr = utask->vaddr + auprobe->def.riprel_target;
> +		*sr = utask->vaddr + auprobe->def.ilen;
>  	}
>  }
>  
> -static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
> -				long *correction)
> +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
>  {
>  	if (auprobe->def.fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) {
>  		struct uprobe_task *utask = current->utask;
>  		unsigned long *sr = scratch_reg(auprobe, regs);
>  
>  		*sr = utask->autask.saved_scratch_register;
> -		/*
> -		 * The original instruction includes a displacement, and so
> -		 * is 4 bytes longer than what we've just single-stepped.
> -		 * Caller may need to apply other fixups to handle stuff
> -		 * like "jmpq *...(%rip)" and "callq *...(%rip)".
> -		 */
> -		if (correction)
> -			*correction += 4;
>  	}
>  }
>  #else /* 32-bit: */
> @@ -379,8 +371,7 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
>  static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
>  {
>  }
> -static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs,
> -					long *correction)
> +static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
>  {
>  }
>  #endif /* CONFIG_X86_64 */
> @@ -419,7 +410,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
>  	struct uprobe_task *utask = current->utask;
>  	long correction = (long)(utask->vaddr - utask->xol_vaddr);
>  
> -	riprel_post_xol(auprobe, regs, &correction);
> +	riprel_post_xol(auprobe, regs);
>  	if (auprobe->def.fixups & UPROBE_FIX_IP) {
>  		regs->ip += correction;
>  	} else if (auprobe->def.fixups & UPROBE_FIX_CALL) {
> @@ -436,7 +427,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs
>  
>  static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
>  {
> -	riprel_post_xol(auprobe, regs, NULL);
> +	riprel_post_xol(auprobe, regs);
>  }
>  
>  static struct uprobe_xol_ops default_xol_ops = {
> -- 
> 1.8.1.4
> 


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3] uprobes: simplify rip-relative handling
       [not found] <1398704774-25173-1-git-send-email-dvlasenk@redhat.com>
  2014-04-29 19:09 ` [PATCH v3] uprobes: simplify rip-relative handling Oleg Nesterov
@ 2014-05-01  0:17 ` Jim Keniston
       [not found] ` <1398704774-25173-2-git-send-email-dvlasenk@redhat.com>
  2 siblings, 0 replies; 8+ messages in thread
From: Jim Keniston @ 2014-05-01  0:17 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Masami Hiramatsu, Srikar Dronamraju, Ingo Molnar,
	Oleg Nesterov

On Mon, 2014-04-28 at 19:06 +0200, Denys Vlasenko wrote:
> It is possible to replace rip-relative addressing mode
> with addressing mode of the same length: (reg+disp32).
> This eliminates the need to fix up immediate
> and correct for changing instruction length.
> 
> v2: Rebased on top of Oleg's latest changes and run-tested.
> v3: Removed unnecessary cast.
> Improved comments based on Oleg's feedback.
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Jim Keniston <jkenisto@us.ibm.com>
> CC: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
> CC: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
> CC: Ingo Molnar <mingo@kernel.org>
> CC: Oleg Nesterov <oleg@redhat.com>

Thanks, Denys.
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] uprobes: use BX register for rip-relative fixups, not AX
       [not found] ` <1398704774-25173-2-git-send-email-dvlasenk@redhat.com>
  2014-04-28 17:34   ` [PATCH] uprobes: use BX register for rip-relative fixups, not AX Oleg Nesterov
  2014-04-28 17:44   ` Denys Vlasenko
@ 2014-05-01  0:29   ` Jim Keniston
  2 siblings, 0 replies; 8+ messages in thread
From: Jim Keniston @ 2014-05-01  0:29 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: linux-kernel, Masami Hiramatsu, Srikar Dronamraju, Ingo Molnar,
	Oleg Nesterov

On Mon, 2014-04-28 at 19:06 +0200, Denys Vlasenko wrote:
> Otherwise, instructions such as cmpxchg and div will be mishandled.
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Jim Keniston <jkenisto@us.ibm.com>
> CC: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
> CC: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
> CC: Ingo Molnar <mingo@kernel.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> ---
>  arch/x86/kernel/uprobes.c | 57 ++++++++++++++++++++++++++++++-----------------
>  1 file changed, 37 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
...
> @@ -296,41 +296,58 @@ static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
>  	 */
>  	cursor = auprobe->insn + insn_offset_modrm(insn);
>  	/*
> -	 * Convert from rip-relative addressing to register-relative addressing
> -	 * via a scratch register.
> +	 * Convert from rip-relative addressing
> +	 * to register-relative addressing via a scratch register.
>  	 */

This comment looks like a regression. :-)

Looks good otherwise (setting aside your later findings about cmpxchg8b
and such -- I guess we need some way to helpfully reject rip-relative
forms of such instructions).

Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-05-01  0:30 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <1398704774-25173-1-git-send-email-dvlasenk@redhat.com>
2014-04-29 19:09 ` [PATCH v3] uprobes: simplify rip-relative handling Oleg Nesterov
2014-05-01  0:17 ` Jim Keniston
     [not found] ` <1398704774-25173-2-git-send-email-dvlasenk@redhat.com>
2014-04-28 17:34   ` [PATCH] uprobes: use BX register for rip-relative fixups, not AX Oleg Nesterov
2014-04-28 19:06     ` Denys Vlasenko
2014-04-28 19:23       ` Oleg Nesterov
2014-04-29 10:16         ` Denys Vlasenko
2014-04-28 17:44   ` Denys Vlasenko
2014-05-01  0:29   ` Jim Keniston

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).