* [PATCH] IA64 trap code 16 bytes atomic copy on montecito
@ 2006-10-31 5:55 bibo,mao
2006-10-31 6:18 ` Keith Owens
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: bibo,mao @ 2006-10-31 5:55 UTC (permalink / raw)
To: linux-ia64
hi,
On IA64 kprobe can not insert trap code on slot 1 because
opcode of slot 1 crosses over two consecutive 8-bytes. On
montecito machine 16 bytes atomic operation is avaiable,
This patch implements 16 bytes atomic copy on montecito
machine, so that kprobe can probe any slot on montecito
machine.
Any comments is welcome.
Signed-off-by: bibo, mao <bibo.mao@intel.com>
thanks
bibo,mao
arch/ia64/kernel/jprobes.S | 38 +++++++++++++++++++++++++++
arch/ia64/kernel/kprobes.c | 16 ++++++++++++----
include/asm-ia64/kprobes.h | 1 +
3 files changed, 51 insertions(+), 4 deletions(-)
-------------------------------------------------------------
diff -Nruap -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/arch/ia64/kernel/jprobes.S 2.6.19-rc2/arch/ia64/kernel/jprobes.S
--- 2.6.19-rc2.org/arch/ia64/kernel/jprobes.S 2006-03-27 14:41:20.000000000 +0800
+++ 2.6.19-rc2/arch/ia64/kernel/jprobes.S 2006-10-31 12:29:14.000000000 +0800
@@ -87,3 +87,41 @@ GLOBAL_ENTRY(flush_register_stack)
br.ret.sptk.many rp
END(flush_register_stack)
+/* this function uses st16/ld16 to atomically copy one bundle
+ * to code area, it requires src address and dest address is
+ * not in UC/UCE/WC area. Currently kernel physical memory
+ * identified map is cachable and WB, so there is no such check.
+ * input0: represents whether this cpu supports atomic
+ * st16/ld16 instruction
+ * input1: destionation address of bundle copy
+ * input2: source address of bundle copy
+ * return: -1 failed, 0 succeed
+ */
+GLOBAL_ENTRY(kprobe_update_inst_bundle)
+ alloc loc0=ar.pfs,3,1,0,0
+
+ and r15\x15,r34
+ and r14\x15,r33
+ mov r8=-1
+ ;;
+ cmp.eq p9,p8=0,r15
+ cmp.eq p7,p6=0,r14
+(p6) br.ret.dptk.many b0
+ ;;
+ cmp4.eq p7,p6=0,r32
+(p8) br.ret.dpnt.many b0
+ ;;
+(p7) ld8 r14=[r34],8
+ mov r8=r0
+(p6) ld16 r14=[r34]
+ ;;
+(p7) st8 [r33]=r14,8
+(p6) st16 [r33]=r14
+ ;;
+(p7) ld8 r14=[r34]
+ ;;
+(p7) st8 [r33]=r14
+ nop.i 0x0
+ br.ret.sptk.many b0
+ ;;
+END(kprobe_update_inst_bundle)
diff -Nruap -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/arch/ia64/kernel/kprobes.c 2.6.19-rc2/arch/ia64/kernel/kprobes.c
--- 2.6.19-rc2.org/arch/ia64/kernel/kprobes.c 2006-10-27 16:39:29.000000000 +0800
+++ 2.6.19-rc2/arch/ia64/kernel/kprobes.c 2006-10-31 13:59:52.000000000 +0800
@@ -39,6 +39,8 @@ extern void jprobe_inst_return(void);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+#define ITANIUM_CPUID4_BIT_AO 2
+#define ITANIUM_CPUID4_AO (0x1UL << ITANIUM_CPUID4_BIT_AO)
enum instruction_type {A, I, M, F, B, L, X, u};
static enum instruction_type bundle_encoding[32][3] = {
@@ -284,6 +286,8 @@ static int __kprobes in_ivt_functions(un
static int __kprobes valid_kprobe_addr(int template, int slot,
unsigned long addr)
{
+ int atomic;
+
if ((slot > 2) || ((bundle_encoding[template][1] = L) && slot > 1)) {
printk(KERN_WARNING "Attempting to insert unaligned kprobe "
"at 0x%lx\n", addr);
@@ -296,7 +300,8 @@ static int __kprobes valid_kprobe_addr(i
return -EINVAL;
}
- if (slot = 1 && bundle_encoding[template][1] != L) {
+ atomic = local_cpu_data->features & ITANIUM_CPUID4_AO;
+ if (slot = 1 && !atomic && bundle_encoding[template][1] != L) {
printk(KERN_WARNING "Inserting kprobes on slot #1 "
"is not supported\n");
return -EINVAL;
@@ -460,10 +465,12 @@ void __kprobes arch_arm_kprobe(struct kp
{
unsigned long addr = (unsigned long)p->addr;
unsigned long arm_addr = addr & ~0xFULL;
+ int atomic;
+ atomic = local_cpu_data->features & ITANIUM_CPUID4_AO;
flush_icache_range((unsigned long)p->ainsn.insn,
(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
- memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
+ kprobe_update_inst_bundle(atomic, (void *)arm_addr, (void *)&p->opcode);
flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
}
@@ -471,10 +478,11 @@ void __kprobes arch_disarm_kprobe(struct
{
unsigned long addr = (unsigned long)p->addr;
unsigned long arm_addr = addr & ~0xFULL;
+ int atomic;
+ atomic = local_cpu_data->features & ITANIUM_CPUID4_AO;
/* p->ainsn.insn contains the original unaltered kprobe_opcode_t */
- memcpy((char *) arm_addr, (char *) p->ainsn.insn,
- sizeof(kprobe_opcode_t));
+ kprobe_update_inst_bundle(atomic, (void *)arm_addr, (void *) p->ainsn.insn);
flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
}
diff -Nruap -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/kprobes.h 2.6.19-rc2/include/asm-ia64/kprobes.h
--- 2.6.19-rc2.org/include/asm-ia64/kprobes.h 2006-10-27 16:39:34.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/kprobes.h 2006-10-31 12:29:36.000000000 +0800
@@ -127,5 +127,6 @@ static inline void jprobe_return(void)
extern void invalidate_stacked_regs(void);
extern void flush_register_stack(void);
extern void arch_remove_kprobe(struct kprobe *p);
+extern int kprobe_update_inst_bundle(int atomic, void *desc, void *src);
#endif /* _ASM_KPROBES_H */
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] IA64 trap code 16 bytes atomic copy on montecito
2006-10-31 5:55 [PATCH] IA64 trap code 16 bytes atomic copy on montecito bibo,mao
@ 2006-10-31 6:18 ` Keith Owens
2006-10-31 7:53 ` Chen, Kenneth W
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Keith Owens @ 2006-10-31 6:18 UTC (permalink / raw)
To: linux-ia64
"bibo,mao" (on Tue, 31 Oct 2006 13:55:42 +0800) wrote:
>hi,
> On IA64 kprobe can not insert trap code on slot 1 because
>opcode of slot 1 crosses over two consecutive 8-bytes. On
>montecito machine 16 bytes atomic operation is avaiable,
>This patch implements 16 bytes atomic copy on montecito
>machine, so that kprobe can probe any slot on montecito
>machine.
> Any comments is welcome.
>
>Signed-off-by: bibo, mao <bibo.mao@intel.com>
>
>thanks
>bibo,mao
>
> arch/ia64/kernel/jprobes.S | 38 +++++++++++++++++++++++++++
> arch/ia64/kernel/kprobes.c | 16 ++++++++++++----
> include/asm-ia64/kprobes.h | 1 +
> 3 files changed, 51 insertions(+), 4 deletions(-)
>-------------------------------------------------------------
>
>diff -Nruap -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/arch/ia64/kernel/jprobes.S 2.6.19-rc2/arch/ia64/kernel/jprobes.S
>--- 2.6.19-rc2.org/arch/ia64/kernel/jprobes.S 2006-03-27 14:41:20.000000000 +0800
>+++ 2.6.19-rc2/arch/ia64/kernel/jprobes.S 2006-10-31 12:29:14.000000000 +0800
>@@ -87,3 +87,41 @@ GLOBAL_ENTRY(flush_register_stack)
> br.ret.sptk.many rp
> END(flush_register_stack)
>
>+/* this function uses st16/ld16 to atomically copy one bundle
>+ * to code area, it requires src address and dest address is
>+ * not in UC/UCE/WC area. Currently kernel physical memory
>+ * identified map is cachable and WB, so there is no such check.
>+ * input0: represents whether this cpu supports atomic
>+ * st16/ld16 instruction
>+ * input1: destionation address of bundle copy
>+ * input2: source address of bundle copy
>+ * return: -1 failed, 0 succeed
Trailing whitspace in patch, on the end of the 'return:' comment..
>+ */
>+GLOBAL_ENTRY(kprobe_update_inst_bundle)
>+ alloc loc0=ar.pfs,3,1,0,0
>+
>+ and r15\x15,r34
>+ and r14\x15,r33
Use in0, in1, in2, not r32-34.
>+ mov r8=-1
>+ ;;
>+ cmp.eq p9,p8=0,r15
>+ cmp.eq p7,p6=0,r14
"cmp.ne p8,p0=0,r15" is more readable. You are testing for a non-zero
value, but using cmp.eq then testing the second predicate is harder to
read.
I have never understood why people code two predicates on cmp when they
only ever use one of them, it makes other coders stop and check "where
is the other predicate used?". p0 makes it explicit that the second
predicate is not used.
>+(p6) br.ret.dptk.many b0
>+ ;;
>+ cmp4.eq p7,p6=0,r32
>+(p8) br.ret.dpnt.many b0
>+ ;;
>+(p7) ld8 r14=[r34],8
>+ mov r8=r0
>+(p6) ld16 r14=[r34]
>+ ;;
>+(p7) st8 [r33]=r14,8
That st8 is not an atomic operation on an instruction slot, which
conflicts with the function's specification. It is probably cleaner to
do any non-atomic updates in C, so this routine only does ld16/st16.
That also removes the atomic input and conditional code from the
assembler function. The less that is coded in assembler, the better.
>+(p6) st16 [r33]=r14
>+ ;;
>+(p7) ld8 r14=[r34]
>+ ;;
>+(p7) st8 [r33]=r14
>+ nop.i 0x0
Why insert an explicit nop? The assembler does that for you.
>+ br.ret.sptk.many b0
>+ ;;
Trailing whitespace again.
>+END(kprobe_update_inst_bundle)
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH] IA64 trap code 16 bytes atomic copy on montecito
2006-10-31 5:55 [PATCH] IA64 trap code 16 bytes atomic copy on montecito bibo,mao
2006-10-31 6:18 ` Keith Owens
@ 2006-10-31 7:53 ` Chen, Kenneth W
2006-10-31 8:09 ` Chen, Kenneth W
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Chen, Kenneth W @ 2006-10-31 7:53 UTC (permalink / raw)
To: linux-ia64
bibo,mao wrote on Monday, October 30, 2006 9:56 PM
> On IA64 kprobe can not insert trap code on slot 1 because
> opcode of slot 1 crosses over two consecutive 8-bytes. On
> montecito machine 16 bytes atomic operation is avaiable,
> This patch implements 16 bytes atomic copy on montecito
> machine, so that kprobe can probe any slot on montecito
> machine.
>
> +/* this function uses st16/ld16 to atomically copy one bundle
> + * to code area, it requires src address and dest address is
> + * not in UC/UCE/WC area. Currently kernel physical memory
> + * identified map is cachable and WB, so there is no such check.
> + * input0: represents whether this cpu supports atomic
> + * st16/ld16 instruction
> + * input1: destionation address of bundle copy
> + * input2: source address of bundle copy
> + * return: -1 failed, 0 succeed
> + */
> +GLOBAL_ENTRY(kprobe_update_inst_bundle)
Hmm, the description doesn't match with implementation. I'm really
confused to the purpose of this asm function. It is using a pair of
ld8/st8 or using ld16/st16 depends on cpu feature. It returns error
only on address mis-alignment. There is no atomicity in there as
claimed in the description.
> @@ -460,10 +465,12 @@ void __kprobes arch_arm_kprobe(struct kp
> {
> unsigned long addr = (unsigned long)p->addr;
> unsigned long arm_addr = addr & ~0xFULL;
> + int atomic;
>
> + atomic = local_cpu_data->features & ITANIUM_CPUID4_AO;
> flush_icache_range((unsigned long)p->ainsn.insn,
> (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
> - memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
> + kprobe_update_inst_bundle(atomic, (void *)arm_addr, (void *)&p->opcode);
> flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
> }
Return value of kprobe_update_inst_bundle() is not used here. I suggest
re-design the function prototype. What does it mean to have an error? If
it is non-fatal, then why bother return a value?
> @@ -471,10 +478,11 @@ void __kprobes arch_disarm_kprobe(struct
> {
> unsigned long addr = (unsigned long)p->addr;
> unsigned long arm_addr = addr & ~0xFULL;
> + int atomic;
>
> + atomic = local_cpu_data->features & ITANIUM_CPUID4_AO;
> /* p->ainsn.insn contains the original unaltered kprobe_opcode_t */
> - memcpy((char *) arm_addr, (char *) p->ainsn.insn,
> - sizeof(kprobe_opcode_t));
> + kprobe_update_inst_bundle(atomic, (void *)arm_addr, (void *) p->ainsn.insn);
> flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
> }
Same here with kprobe_update_inst_bundle().
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH] IA64 trap code 16 bytes atomic copy on montecito
2006-10-31 5:55 [PATCH] IA64 trap code 16 bytes atomic copy on montecito bibo,mao
2006-10-31 6:18 ` Keith Owens
2006-10-31 7:53 ` Chen, Kenneth W
@ 2006-10-31 8:09 ` Chen, Kenneth W
2006-10-31 8:19 ` bibo,mao
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Chen, Kenneth W @ 2006-10-31 8:09 UTC (permalink / raw)
To: linux-ia64
Keith Owens wrote on Monday, October 30, 2006 10:18 PM
> >+GLOBAL_ENTRY(kprobe_update_inst_bundle)
> >+ alloc loc0=ar.pfs,3,1,0,0
> >+
> >+ and r15\x15,r34
> >+ and r14\x15,r33
>
> Use in0, in1, in2, not r32-34.
Also there is no need to resize the register stack frame here, since
this is already a leaf function and there are plenty scratch register
you can use before tap into register stack. I personally prefer not
to do alloc instruction here.
And I think it would be a lot easier if you implement an intrinsic
function, like ia64_ld16/ia64_st16 and stick them in include/asm-ia64/
gcc_intrin.h and intel_intrin.h.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] IA64 trap code 16 bytes atomic copy on montecito
2006-10-31 5:55 [PATCH] IA64 trap code 16 bytes atomic copy on montecito bibo,mao
` (2 preceding siblings ...)
2006-10-31 8:09 ` Chen, Kenneth W
@ 2006-10-31 8:19 ` bibo,mao
2006-10-31 8:40 ` Chen, Kenneth W
2006-10-31 12:48 ` bibo,mao
5 siblings, 0 replies; 7+ messages in thread
From: bibo,mao @ 2006-10-31 8:19 UTC (permalink / raw)
To: linux-ia64
Chen, Kenneth W wrote:
> Keith Owens wrote on Monday, October 30, 2006 10:18 PM
>>> +GLOBAL_ENTRY(kprobe_update_inst_bundle)
>>> + alloc loc0=ar.pfs,3,1,0,0
>>> +
>>> + and r15\x15,r34
>>> + and r14\x15,r33
>> Use in0, in1, in2, not r32-34.
>
> Also there is no need to resize the register stack frame here, since
> this is already a leaf function and there are plenty scratch register
> you can use before tap into register stack. I personally prefer not
> to do alloc instruction here.
>
> And I think it would be a lot easier if you implement an intrinsic
> function, like ia64_ld16/ia64_st16 and stick them in include/asm-ia64/
> gcc_intrin.h and intel_intrin.h.
>
but there will be inline asm in c language, it is not benefit for gcc to
optimization, I hear that IA64 hates inline asm.
thanks
bibo,mao
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [PATCH] IA64 trap code 16 bytes atomic copy on montecito
2006-10-31 5:55 [PATCH] IA64 trap code 16 bytes atomic copy on montecito bibo,mao
` (3 preceding siblings ...)
2006-10-31 8:19 ` bibo,mao
@ 2006-10-31 8:40 ` Chen, Kenneth W
2006-10-31 12:48 ` bibo,mao
5 siblings, 0 replies; 7+ messages in thread
From: Chen, Kenneth W @ 2006-10-31 8:40 UTC (permalink / raw)
To: linux-ia64
Mao, Bibo wrote on Tuesday, October 31, 2006 12:19 AM
> > Also there is no need to resize the register stack frame here, since
> > this is already a leaf function and there are plenty scratch register
> > you can use before tap into register stack. I personally prefer not
> > to do alloc instruction here.
> >
> > And I think it would be a lot easier if you implement an intrinsic
> > function, like ia64_ld16/ia64_st16 and stick them in include/asm-ia64/
> > gcc_intrin.h and intel_intrin.h.
> >
>
> but there will be inline asm in c language, it is not benefit for gcc to
> optimization, I hear that IA64 hates inline asm.
We hate style like:
void foo()
{
int a;
blah()
asm("ld16 ..." :: "" ..."");
bar();
}
Because this breaks all icc builds. It's perfectly fine to add an
abstraction function that turns the above asm("") into ia64_ld16(). For
gcc, it expands into an inline asm. For icc, it turns into an intrinsic.
In fact, for a simple case like ld16 instruction, it is better to use
intrinsic (or gcc asm with appropriate clobber list) because using a
function call will pretty much destroy all high level optimization
around that call. Just imagine all of intermediate value stored in
scratch registers before the call all become void after the call. With
asm/intrinsic, the compiler has more knowledge to what's going on and
can do a better job at it.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] IA64 trap code 16 bytes atomic copy on montecito
2006-10-31 5:55 [PATCH] IA64 trap code 16 bytes atomic copy on montecito bibo,mao
` (4 preceding siblings ...)
2006-10-31 8:40 ` Chen, Kenneth W
@ 2006-10-31 12:48 ` bibo,mao
5 siblings, 0 replies; 7+ messages in thread
From: bibo,mao @ 2006-10-31 12:48 UTC (permalink / raw)
To: linux-ia64
Now I add ia64_st16 macro in gcc_intrin.h, ia64_memcpy16 seems better.
As for icc intrinsic, I will check icc's header file to see whether
there exits st16/ld16 intrinsics.
thanks
bibo,mao
Chen, Kenneth W wrote:
> Mao, Bibo wrote on Tuesday, October 31, 2006 12:19 AM
>>> Also there is no need to resize the register stack frame here, since
>>> this is already a leaf function and there are plenty scratch register
>>> you can use before tap into register stack. I personally prefer not
>>> to do alloc instruction here.
>>>
>>> And I think it would be a lot easier if you implement an intrinsic
>>> function, like ia64_ld16/ia64_st16 and stick them in include/asm-ia64/
>>> gcc_intrin.h and intel_intrin.h.
>>>
>> but there will be inline asm in c language, it is not benefit for gcc to
>> optimization, I hear that IA64 hates inline asm.
>
> We hate style like:
>
> void foo()
> {
> int a;
>
> blah()
>
> asm("ld16 ..." :: "" ..."");
>
> bar();
> }
>
> Because this breaks all icc builds. It's perfectly fine to add an
> abstraction function that turns the above asm("") into ia64_ld16(). For
> gcc, it expands into an inline asm. For icc, it turns into an intrinsic.
>
> In fact, for a simple case like ld16 instruction, it is better to use
> intrinsic (or gcc asm with appropriate clobber list) because using a
> function call will pretty much destroy all high level optimization
> around that call. Just imagine all of intermediate value stored in
> scratch registers before the call all become void after the call. With
> asm/intrinsic, the compiler has more knowledge to what's going on and
> can do a better job at it.
>
>
>
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/arch/ia64/kernel/kprobes.c 2.6.19-rc2/arch/ia64/kernel/kprobes.c
--- 2.6.19-rc2.org/arch/ia64/kernel/kprobes.c 2006-10-27 16:39:29.000000000 +0800
+++ 2.6.19-rc2/arch/ia64/kernel/kprobes.c 2006-10-31 20:19:38.000000000 +0800
@@ -39,6 +39,8 @@ extern void jprobe_inst_return(void);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+#define ITANIUM_CPUID4_BIT_AO 2
+#define ITANIUM_CPUID4_AO (0x1UL << ITANIUM_CPUID4_BIT_AO)
enum instruction_type {A, I, M, F, B, L, X, u};
static enum instruction_type bundle_encoding[32][3] = {
@@ -284,6 +286,8 @@ static int __kprobes in_ivt_functions(un
static int __kprobes valid_kprobe_addr(int template, int slot,
unsigned long addr)
{
+ int atomic;
+
if ((slot > 2) || ((bundle_encoding[template][1] = L) && slot > 1)) {
printk(KERN_WARNING "Attempting to insert unaligned kprobe "
"at 0x%lx\n", addr);
@@ -296,7 +300,8 @@ static int __kprobes valid_kprobe_addr(i
return -EINVAL;
}
- if (slot = 1 && bundle_encoding[template][1] != L) {
+ atomic = local_cpu_data->features & ITANIUM_CPUID4_AO;
+ if (slot = 1 && !atomic && bundle_encoding[template][1] != L) {
printk(KERN_WARNING "Inserting kprobes on slot #1 "
"is not supported\n");
return -EINVAL;
@@ -448,6 +453,12 @@ int __kprobes arch_prepare_kprobe(struct
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
return -ENOMEM;
+ if (unlikely(((unsigned long)&p->opcode & 0xF)
+ || ((unsigned long)p->ainsn.insn & 0xF))) {
+ printk(KERN_WARNING "Kprobes opcode 16-bytes unalignment\n ");
+ return -EINVAL;
+ }
+
memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t));
memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t));
@@ -463,7 +474,10 @@ void __kprobes arch_arm_kprobe(struct kp
flush_icache_range((unsigned long)p->ainsn.insn,
(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
- memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
+ if (local_cpu_data->features & ITANIUM_CPUID4_AO)
+ ia64_st16((void *)arm_addr, (void *)&p->opcode);
+ else
+ memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
}
@@ -473,8 +487,11 @@ void __kprobes arch_disarm_kprobe(struct
unsigned long arm_addr = addr & ~0xFULL;
/* p->ainsn.insn contains the original unaltered kprobe_opcode_t */
- memcpy((char *) arm_addr, (char *) p->ainsn.insn,
- sizeof(kprobe_opcode_t));
+ if (local_cpu_data->features & ITANIUM_CPUID4_AO)
+ ia64_st16((void *)arm_addr, (void *) p->ainsn.insn);
+ else
+ memcpy((char *) arm_addr, (char *) p->ainsn.insn,
+ sizeof(kprobe_opcode_t));
flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
}
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/gcc_intrin.h 2.6.19-rc2/include/asm-ia64/gcc_intrin.h
--- 2.6.19-rc2.org/include/asm-ia64/gcc_intrin.h 2005-08-29 07:41:01.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/gcc_intrin.h 2006-10-31 18:34:23.000000000 +0800
@@ -598,4 +598,12 @@ do { \
:: "r"((x)) : "p6", "p7", "memory"); \
} while (0)
+#define ia64_st16(dest, src) \
+do { \
+ unsigned long value; \
+ asm volatile(";; ld16 %0=[%2];;" \
+ " st16 [%1]=%0;;" \
+ :"=r"(value) \
+ :"r"(dest), "r"(src) : "memory"); \
+} while(0)
#endif /* _ASM_IA64_GCC_INTRIN_H */
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2006-10-31 12:48 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-31 5:55 [PATCH] IA64 trap code 16 bytes atomic copy on montecito bibo,mao
2006-10-31 6:18 ` Keith Owens
2006-10-31 7:53 ` Chen, Kenneth W
2006-10-31 8:09 ` Chen, Kenneth W
2006-10-31 8:19 ` bibo,mao
2006-10-31 8:40 ` Chen, Kenneth W
2006-10-31 12:48 ` bibo,mao
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox