public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
@ 2006-11-02  3:11 bibo,mao
  2006-11-02  3:39 ` Keith Owens
                   ` (19 more replies)
  0 siblings, 20 replies; 21+ messages in thread
From: bibo,mao @ 2006-11-02  3:11 UTC (permalink / raw)
  To: linux-ia64

hi,
 On IA64 kprobe can not insert trap code on slot 1 because
opcode of slot 1 crosses over two consecutive 8-bytes. On
montecito machine 16 bytes atomic operation is avaiable.
This patch adds ia64_ld16/ia64_st16 instrins in gcc header
file, implements atomic instr bundle updating by cpu feature.
 
Signed-off-by: bibo, mao <bibo.mao@intel.com>
 
thanks
bibo,mao 


diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/arch/ia64/kernel/kprobes.c 2.6.19-rc2/arch/ia64/kernel/kprobes.c
--- 2.6.19-rc2.org/arch/ia64/kernel/kprobes.c	2006-10-27 16:39:29.000000000 +0800
+++ 2.6.19-rc2/arch/ia64/kernel/kprobes.c	2006-11-01 19:05:18.000000000 +0800
@@ -296,7 +296,7 @@ static int __kprobes valid_kprobe_addr(i
		return -EINVAL;
	}

-	if (slot = 1 && bundle_encoding[template][1] != L) {
+	if (slot = 1 && bundle_encoding[template][1] != L && !ATOMIC_UPDATE) {
		printk(KERN_WARNING "Inserting kprobes on slot #1 "
		       "is not supported\n");
		return -EINVAL;
@@ -448,6 +448,12 @@ int __kprobes arch_prepare_kprobe(struct
	p->ainsn.insn = get_insn_slot();
	if (!p->ainsn.insn)
		return -ENOMEM;
+	if (unlikely(((unsigned long)&p->opcode & 0xF)
+				|| ((unsigned long)p->ainsn.insn & 0xF))) {
+		printk(KERN_WARNING "Kprobes opcode 16-bytes unalignment\n ");
+		return -EINVAL;
+	}
+
	memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t));
	memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t));

@@ -463,7 +469,10 @@ void __kprobes arch_arm_kprobe(struct kp

	flush_icache_range((unsigned long)p->ainsn.insn,
			(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
-	memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
+	if (ATOMIC_UPDATE)
+		kprobe_update_bundle((void *)arm_addr, (void *)&p->opcode);
+	else
+		memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
	flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
}

@@ -473,8 +482,11 @@ void __kprobes arch_disarm_kprobe(struct
	unsigned long arm_addr = addr & ~0xFULL;

	/* p->ainsn.insn contains the original unaltered kprobe_opcode_t */
-	memcpy((char *) arm_addr, (char *) p->ainsn.insn,
-					 sizeof(kprobe_opcode_t));
+	if (ATOMIC_UPDATE)
+		kprobe_update_bundle((void *)arm_addr, (void *) p->ainsn.insn);
+	else
+		memcpy((char *) arm_addr, (char *) p->ainsn.insn,
+					sizeof(kprobe_opcode_t));
	flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
}

diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/gcc_intrin.h 2.6.19-rc2/include/asm-ia64/gcc_intrin.h
--- 2.6.19-rc2.org/include/asm-ia64/gcc_intrin.h	2005-08-29 07:41:01.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/gcc_intrin.h	2006-11-01 18:31:36.000000000 +0800
@@ -598,4 +598,8 @@ do {								\
		      :: "r"((x)) : "p6", "p7", "memory");	\
} while (0)

+#define ia64_ld16(low, addr)						\
+	asm volatile(";;ld16 %0=[%1];;":"=r"(low):"r"(addr):"memory")
+#define ia64_st16(low, addr)						\
+	asm volatile(";;st16 [%1]=%0;;"::"r"(low),"r"(addr):"memory")
#endif /* _ASM_IA64_GCC_INTRIN_H */
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/intel_intrin.h 2.6.19-rc2/include/asm-ia64/intel_intrin.h
--- 2.6.19-rc2.org/include/asm-ia64/intel_intrin.h	2006-07-24 10:47:13.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/intel_intrin.h	2006-11-01 18:38:13.000000000 +0800
@@ -152,6 +152,10 @@ do {							\
	}						\
} while (0)

+#define ia64_st16(low, addr)    __st16(__sttype_none, __sthint_none, addr, low)
+#define ia64_ld16(low, addr)					\
+  	low =  __ld16(__ldtype_none, __ldtype_none, addr)
+
#define __builtin_trap()	__break(0);

#endif /* _ASM_IA64_INTEL_INTRIN_H */
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/kprobes.h 2.6.19-rc2/include/asm-ia64/kprobes.h
--- 2.6.19-rc2.org/include/asm-ia64/kprobes.h	2006-10-27 16:39:34.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/kprobes.h	2006-11-01 19:08:04.000000000 +0800
@@ -88,6 +88,7 @@ struct kprobe_ctlblk {
#define SLOT0_OPCODE_SHIFT	(37)
#define SLOT1_p1_OPCODE_SHIFT	(37 - (64-46))
#define SLOT2_OPCODE_SHIFT 	(37)
+#define ATOMIC_UPDATE		(local_cpu_data->features & ITANIUM_CPUID4_AO)

#define INDIRECT_CALL_OPCODE		(1)
#define IP_RELATIVE_CALL_OPCODE		(5)
@@ -96,6 +97,12 @@ struct kprobe_ctlblk {
#define LONG_BRANCH_OPCODE		(0xC)
#define LONG_CALL_OPCODE		(0xD)
#define flush_insn_slot(p)		do { } while (0)
+#define kprobe_update_bundle(dest, src)		\
+do {						\
+	unsigned long low;			\
+	ia64_ld16(low, src);			\
+	ia64_st16(low, dest);			\
+} while (0)

typedef struct kprobe_opcode {
	bundle_t bundle;
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/kregs.h 2.6.19-rc2/include/asm-ia64/kregs.h
--- 2.6.19-rc2.org/include/asm-ia64/kregs.h	2005-08-29 07:41:01.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/kregs.h	2006-11-01 18:54:37.000000000 +0800
@@ -160,4 +160,7 @@
#define IA64_ISR_CODE_LFETCH	4
#define IA64_ISR_CODE_PROBEF	5

+/* CPUID 4 Register */
+#define ITANIUM_CPUID4_AO_BIT	2
+#define ITANIUM_CPUID4_AO	(__IA64_UL(1) << ITANIUM_CPUID4_AO_BIT)
#endif /* _ASM_IA64_kREGS_H */

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
@ 2006-11-02  3:39 ` Keith Owens
  2006-11-02  5:04 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
                   ` (18 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: Keith Owens @ 2006-11-02  3:39 UTC (permalink / raw)
  To: linux-ia64

"bibo,mao" (on Thu, 02 Nov 2006 11:11:42 +0800) wrote:
>+#define ia64_ld16(low, addr)						\
>+	asm volatile(";;ld16 %0=[%1];;":"=r"(low):"r"(addr):"memory")
>+#define ia64_st16(low, addr)						\
>+	asm volatile(";;st16 [%1]=%0;;"::"r"(low),"r"(addr):"memory")
>...
>+#define ia64_st16(low, addr)    __st16(__sttype_none, __sthint_none, addr, low)
>+#define ia64_ld16(low, addr)					\
>+  	low =  __ld16(__ldtype_none, __ldtype_none, addr)
>+

ld16 clobbers ar.csd, that needs to be added to the definition of
ia64_ld16.

ia64_ld16 does not need a memory clobber.

Strictly speaking, ia64_st16 does not need a memory clobber.  addr
should be a write operand (not read as you have it) and gcc should see
that addr is clobbered.  However we clobber 16 bytes starting at addr
and I suspect that gcc has no way of telling about the second set of 8
bytes.  In this case, we may have to stick with a memory clobber on
ia64_st16.

>+#define kprobe_update_bundle(dest, src)		\
>+do {						\
>+	unsigned long low;			\
>+	ia64_ld16(low, src);			\
>+	ia64_st16(low, dest);			\
>+} while (0)

Using unsigned long (8 bytes) is misleading for a 16 byte operation.
Not sure what we can do about that.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
  2006-11-02  3:39 ` Keith Owens
@ 2006-11-02  5:04 ` bibo,mao
  2006-11-02  6:51 ` bibo,mao
                   ` (17 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: bibo,mao @ 2006-11-02  5:04 UTC (permalink / raw)
  To: linux-ia64

Hi Keith,
  thanks for reviewing my patch, I reply inline.

thanks
bibo,mao

Keith Owens wrote:
> "bibo,mao" (on Thu, 02 Nov 2006 11:11:42 +0800) wrote:
>> +#define ia64_ld16(low, addr)						\
>> +	asm volatile(";;ld16 %0=[%1];;":"=r"(low):"r"(addr):"memory")
>> +#define ia64_st16(low, addr)						\
>> +	asm volatile(";;st16 [%1]=%0;;"::"r"(low),"r"(addr):"memory")
>> ...
>> +#define ia64_st16(low, addr)    __st16(__sttype_none, __sthint_none, addr, low)
>> +#define ia64_ld16(low, addr)					\
>> +  	low =  __ld16(__ldtype_none, __ldtype_none, addr)
>> +
> 
> ld16 clobbers ar.csd, that needs to be added to the definition of
> ia64_ld16.
Here I should add some annotation how ld16/st16 works

> 
> ia64_ld16 does not need a memory clobber.
ok, I will delete memory clobber for ia64_ld16.

> 
> Strictly speaking, ia64_st16 does not need a memory clobber.  addr
> should be a write operand (not read as you have it) and gcc should see
> that addr is clobbered.  However we clobber 16 bytes starting at addr
> and I suspect that gcc has no way of telling about the second set of 8
> bytes.  In this case, we may have to stick with a memory clobber on
> ia64_st16.
> 
>> +#define kprobe_update_bundle(dest, src)		\
>> +do {						\
>> +	unsigned long low;			\
>> +	ia64_ld16(low, src);			\
>> +	ia64_st16(low, dest);			\
>> +} while (0)
> 
> Using unsigned long (8 bytes) is misleading for a 16 byte operation.
> Not sure what we can do about that.
more annotation should be suitable, I will add annotation for this macro.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
  2006-11-02  3:39 ` Keith Owens
  2006-11-02  5:04 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
@ 2006-11-02  6:51 ` bibo,mao
  2006-11-02  7:17 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Keith Owens
                   ` (16 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: bibo,mao @ 2006-11-02  6:51 UTC (permalink / raw)
  To: linux-ia64

Keith Owens wrote:
> "bibo,mao" (on Thu, 02 Nov 2006 11:11:42 +0800) wrote:
>> +#define ia64_ld16(low, addr)						\
>> +	asm volatile(";;ld16 %0=[%1];;":"=r"(low):"r"(addr):"memory")
>> +#define ia64_st16(low, addr)						\
>> +	asm volatile(";;st16 [%1]=%0;;"::"r"(low),"r"(addr):"memory")
>> ...
>> +#define ia64_st16(low, addr)    __st16(__sttype_none, __sthint_none, addr, low)
>> +#define ia64_ld16(low, addr)					\
>> +  	low =  __ld16(__ldtype_none, __ldtype_none, addr)
>> +
> 
> ld16 clobbers ar.csd, that needs to be added to the definition of
> ia64_ld16.
It seems that gcc does not support inline asm clobber for ar.csd register, 
so here I leave clobber register as empty.
> 
> ia64_ld16 does not need a memory clobber.
memory clobber is removed here.
> 
> Strictly speaking, ia64_st16 does not need a memory clobber.  addr
> should be a write operand (not read as you have it) and gcc should see
> that addr is clobbered.  However we clobber 16 bytes starting at addr
> and I suspect that gcc has no way of telling about the second set of 8
> bytes.  In this case, we may have to stick with a memory clobber on
> ia64_st16.
> 
>> +#define kprobe_update_bundle(dest, src)		\
>> +do {						\
>> +	unsigned long low;			\
>> +	ia64_ld16(low, src);			\
>> +	ia64_st16(low, dest);			\
>> +} while (0)
> 
> Using unsigned long (8 bytes) is misleading for a 16 byte operation.
> Not sure what we can do about that.

-------------------------------------------------------------------
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/arch/ia64/kernel/kprobes.c 2.6.19-rc2/arch/ia64/kernel/kprobes.c
--- 2.6.19-rc2.org/arch/ia64/kernel/kprobes.c	2006-10-27 16:39:29.000000000 +0800
+++ 2.6.19-rc2/arch/ia64/kernel/kprobes.c	2006-11-01 19:05:18.000000000 +0800
@@ -296,7 +296,7 @@ static int __kprobes valid_kprobe_addr(i
 		return -EINVAL;
 	}
 
-	if (slot = 1 && bundle_encoding[template][1] != L) {
+	if (slot = 1 && bundle_encoding[template][1] != L && !ATOMIC_UPDATE) {
 		printk(KERN_WARNING "Inserting kprobes on slot #1 "
 		       "is not supported\n");
 		return -EINVAL;
@@ -448,6 +448,12 @@ int __kprobes arch_prepare_kprobe(struct
 	p->ainsn.insn = get_insn_slot();
 	if (!p->ainsn.insn)
 		return -ENOMEM;
+	if (unlikely(((unsigned long)&p->opcode & 0xF)
+				|| ((unsigned long)p->ainsn.insn & 0xF))) {
+		printk(KERN_WARNING "Kprobes opcode 16-bytes unalignment\n ");
+		return -EINVAL;
+	}
+
 	memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t));
 	memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t));
 
@@ -463,7 +469,10 @@ void __kprobes arch_arm_kprobe(struct kp
 
 	flush_icache_range((unsigned long)p->ainsn.insn,
 			(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
-	memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
+	if (ATOMIC_UPDATE)
+		kprobe_update_bundle((void *)arm_addr, (void *)&p->opcode);
+	else
+		memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
 	flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
 }
 
@@ -473,8 +482,11 @@ void __kprobes arch_disarm_kprobe(struct
 	unsigned long arm_addr = addr & ~0xFULL;
 
 	/* p->ainsn.insn contains the original unaltered kprobe_opcode_t */
-	memcpy((char *) arm_addr, (char *) p->ainsn.insn,
-					 sizeof(kprobe_opcode_t));
+	if (ATOMIC_UPDATE)
+		kprobe_update_bundle((void *)arm_addr, (void *) p->ainsn.insn);
+	else
+		memcpy((char *) arm_addr, (char *) p->ainsn.insn,
+					sizeof(kprobe_opcode_t));
 	flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
 }
 
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/gcc_intrin.h 2.6.19-rc2/include/asm-ia64/gcc_intrin.h
--- 2.6.19-rc2.org/include/asm-ia64/gcc_intrin.h	2005-08-29 07:41:01.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/gcc_intrin.h	2006-11-02 15:28:41.000000000 +0800
@@ -598,4 +598,23 @@ do {								\
 		      :: "r"((x)) : "p6", "p7", "memory");	\
 } while (0)
 
+/* ld16/st16 instruction will fault if issued to UC, UCE, or WC memory.
+ * An aligned ld16/st16 instruction is performed as an atomic 16-byte
+ * memory reference. For these instructions, the address specified must
+ * be 16-byte aligned.
+ * ia64_ld16 macro will load the lower 64 bits to low parameter, and higher
+ * 64 bits value is put to ar.csd register. In order to get the 128 bits
+ * value, this can be used:
+ *	ia64_ld16(low, addr);
+ *	high = ia64_getreg(_IA64_REG_AR_CSD); 
+ * ia64_st16 macro will store low parameter value to lower 64 bits, and
+ * ar.csd register to higher 64 bits. In order to store 128 bits value
+ * this can be used:
+ *	ia64_setreg(_IA64_REG_AR_CSD, high);
+ *	ia64_st16(low, addr);
+ */
+#define ia64_ld16(low, addr)						\
+	asm volatile(";;ld16 %0, ar.csd =[%1];; ":"=r"(low): "r"(addr))
+#define ia64_st16(low, addr)						\
+	asm volatile(";;st16 [%1]=%0, ar.csd;; "::"r"(low),"r"(addr):"memory")
 #endif /* _ASM_IA64_GCC_INTRIN_H */
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/intel_intrin.h 2.6.19-rc2/include/asm-ia64/intel_intrin.h
--- 2.6.19-rc2.org/include/asm-ia64/intel_intrin.h	2006-07-24 10:47:13.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/intel_intrin.h	2006-11-01 18:38:13.000000000 +0800
@@ -152,6 +152,10 @@ do {							\
 	}						\
 } while (0)
 
+#define ia64_st16(low, addr)    __st16(__sttype_none, __sthint_none, addr, low)
+#define ia64_ld16(low, addr)					\
+  	low =  __ld16(__ldtype_none, __ldtype_none, addr)
+
 #define __builtin_trap()	__break(0);
 
 #endif /* _ASM_IA64_INTEL_INTRIN_H */
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/kprobes.h 2.6.19-rc2/include/asm-ia64/kprobes.h
--- 2.6.19-rc2.org/include/asm-ia64/kprobes.h	2006-10-27 16:39:34.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/kprobes.h	2006-11-02 15:20:54.000000000 +0800
@@ -88,6 +88,7 @@ struct kprobe_ctlblk {
 #define SLOT0_OPCODE_SHIFT	(37)
 #define SLOT1_p1_OPCODE_SHIFT	(37 - (64-46))
 #define SLOT2_OPCODE_SHIFT 	(37)
+#define ATOMIC_UPDATE		(local_cpu_data->features & ITANIUM_CPUID4_AO)
 
 #define INDIRECT_CALL_OPCODE		(1)
 #define IP_RELATIVE_CALL_OPCODE		(5)
@@ -96,6 +97,17 @@ struct kprobe_ctlblk {
 #define LONG_BRANCH_OPCODE		(0xC)
 #define LONG_CALL_OPCODE		(0xD)
 #define flush_insn_slot(p)		do { } while (0)
+/* this macro will first get 128 bits value from src address,
+ * and then copy this 128 bits value to dest address. It uses
+ * ar.csd register to load and store higher 64 bits value,
+ * and low 64 bits value is passed by low variant.
+ */ 
+#define kprobe_update_bundle(dest, src)		\
+do {						\
+	unsigned long low;			\
+	ia64_ld16(low, src);			\
+	ia64_st16(low, dest);			\
+} while (0)
 
 typedef struct kprobe_opcode {
 	bundle_t bundle;
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/kregs.h 2.6.19-rc2/include/asm-ia64/kregs.h
--- 2.6.19-rc2.org/include/asm-ia64/kregs.h	2005-08-29 07:41:01.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/kregs.h	2006-11-01 18:54:37.000000000 +0800
@@ -160,4 +160,7 @@
 #define IA64_ISR_CODE_LFETCH	4
 #define IA64_ISR_CODE_PROBEF	5
 
+/* CPUID 4 Register */
+#define ITANIUM_CPUID4_AO_BIT	2
+#define ITANIUM_CPUID4_AO	(__IA64_UL(1) << ITANIUM_CPUID4_AO_BIT)
 #endif /* _ASM_IA64_kREGS_H */

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (2 preceding siblings ...)
  2006-11-02  6:51 ` bibo,mao
@ 2006-11-02  7:17 ` Keith Owens
  2006-11-02  7:22 ` Keith Owens
                   ` (15 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: Keith Owens @ 2006-11-02  7:17 UTC (permalink / raw)
  To: linux-ia64

"bibo,mao" (on Thu, 02 Nov 2006 14:51:18 +0800) wrote:
>Keith Owens wrote:
>> "bibo,mao" (on Thu, 02 Nov 2006 11:11:42 +0800) wrote:
>>> +#define ia64_ld16(low, addr)						\
>>> +	asm volatile(";;ld16 %0=[%1];;":"=r"(low):"r"(addr):"memory")
>>> +#define ia64_st16(low, addr)						\
>>> +	asm volatile(";;st16 [%1]=%0;;"::"r"(low),"r"(addr):"memory")
>>> ...
>>> +#define ia64_st16(low, addr)    __st16(__sttype_none, __sthint_none, addr, low)
>>> +#define ia64_ld16(low, addr)					\
>>> +  	low =  __ld16(__ldtype_none, __ldtype_none, addr)
>>> +
>> 
>> ld16 clobbers ar.csd, that needs to be added to the definition of
>> ia64_ld16.
>It seems that gcc does not support inline asm clobber for ar.csd register, 
>so here I leave clobber register as empty.

In that case I strongly recommend against using inline asm for ld16 and
st16.  If we cannot tell gcc what the side effects are then inline asm
is not safe.  Use a small assembler function instead, and accept that
we are calling an external function which stops gcc from optimizing
register usage in the C code.

Using a separate assembler function also gets around a dodgy bit of C
code.  Strictly speaking, you should not rely on running two inline asm
statements together, i.e. ia64_ld16() followed by ia64_st16() is not
guaranteed to work.  From 'info gcc':

  "Similarly, you can't expect a sequence of volatile `asm'
  instructions to remain perfectly consecutive.  If you want
  consecutive output, use a single `asm'".

This should do the job.  No prologue or alloc because it is a leaf
function, which means no in0/in1, use r32/r33 instead.

extern ld16_st16(src, dest);

ENTRY(ld16_st16)
	ld16 r15=[r33]		// dest
	;;
	st16 [r32]=r15		// src
	br.ret.sptk.many rp
EXIT(ld16_st16)


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (3 preceding siblings ...)
  2006-11-02  7:17 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Keith Owens
@ 2006-11-02  7:22 ` Keith Owens
  2006-11-02  7:25 ` Keith Owens
                   ` (14 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: Keith Owens @ 2006-11-02  7:22 UTC (permalink / raw)
  To: linux-ia64

Keith Owens (on Thu, 02 Nov 2006 18:17:57 +1100) wrote:
>This should do the job.  No prologue or alloc because it is a leaf
>function, which means no in0/in1, use r32/r33 instead.
>
>extern ld16_st16(src, dest);
>
>ENTRY(ld16_st16)
>	ld16 r15=[r33]		// dest
>	;;
>	st16 [r32]=r15		// src
>	br.ret.sptk.many rp
>EXIT(ld16_st16)

/me kicks himself for mixing up src and dest.

extern ld16_st16(dest, src);

ENTRY(ld16_st16)
	ld16 r15=[r32]		// src
	;;
	st16 [r33]=r15		// dest
	br.ret.sptk.many rp
EXIT(ld16_st16)


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (4 preceding siblings ...)
  2006-11-02  7:22 ` Keith Owens
@ 2006-11-02  7:25 ` Keith Owens
  2006-11-02  7:27 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
                   ` (13 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: Keith Owens @ 2006-11-02  7:25 UTC (permalink / raw)
  To: linux-ia64

Keith Owens (on Thu, 02 Nov 2006 18:22:58 +1100) wrote:
>/me kicks himself for mixing up src and dest.
>
>extern ld16_st16(dest, src);
>
>ENTRY(ld16_st16)
>	ld16 r15=[r32]		// src
>	;;
>	st16 [r33]=r15		// dest
>	br.ret.sptk.many rp
>EXIT(ld16_st16)

Third time lucky, then it's time for some sleep.

extern ld16_st16(dest, src);

ENTRY(ld16_st16)
	ld16 r15=[r33]		// src
	;;
	st16 [r32]=r15		// dest
	br.ret.sptk.many rp
EXIT(ld16_st16)


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (5 preceding siblings ...)
  2006-11-02  7:25 ` Keith Owens
@ 2006-11-02  7:27 ` bibo,mao
  2006-11-02  7:32 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Keith Owens
                   ` (12 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: bibo,mao @ 2006-11-02  7:27 UTC (permalink / raw)
  To: linux-ia64

Keith Owens wrote:
> Keith Owens (on Thu, 02 Nov 2006 18:22:58 +1100) wrote:
>> /me kicks himself for mixing up src and dest.
>>
>> extern ld16_st16(dest, src);
>>
>> ENTRY(ld16_st16)
>> 	ld16 r15=[r32]		// src
>> 	;;
>> 	st16 [r33]=r15		// dest
>> 	br.ret.sptk.many rp
>> EXIT(ld16_st16)
> 
> Third time lucky, then it's time for some sleep.
Thanks:), I do not whether ar.csd should be saved/restore across
this function.
> 
> extern ld16_st16(dest, src);
> 
> ENTRY(ld16_st16)
> 	ld16 r15=[r33]		// src
> 	;;
> 	st16 [r32]=r15		// dest
> 	br.ret.sptk.many rp
> EXIT(ld16_st16)

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (6 preceding siblings ...)
  2006-11-02  7:27 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
@ 2006-11-02  7:32 ` Keith Owens
  2006-11-02  7:38 ` Chen, Kenneth W
                   ` (11 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: Keith Owens @ 2006-11-02  7:32 UTC (permalink / raw)
  To: linux-ia64

"bibo,mao" (on Thu, 02 Nov 2006 15:27:34 +0800) wrote:
>Keith Owens wrote:
>> Keith Owens (on Thu, 02 Nov 2006 18:22:58 +1100) wrote:
>>> /me kicks himself for mixing up src and dest.
>>>
>>> extern ld16_st16(dest, src);
>>>
>>> ENTRY(ld16_st16)
>>> 	ld16 r15=[r32]		// src
>>> 	;;
>>> 	st16 [r33]=r15		// dest
>>> 	br.ret.sptk.many rp
>>> EXIT(ld16_st16)
>> 
>> Third time lucky, then it's time for some sleep.
>Thanks:), I do not whether ar.csd should be saved/restore across
>this function.

ar.csd is defined to be a scratch register.  So its contents do not
need to be saved.

>> extern ld16_st16(dest, src);
>> 
>> ENTRY(ld16_st16)
>> 	ld16 r15=[r33]		// src
>> 	;;
>> 	st16 [r32]=r15		// dest
>> 	br.ret.sptk.many rp
>> EXIT(ld16_st16)
>
>


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (7 preceding siblings ...)
  2006-11-02  7:32 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Keith Owens
@ 2006-11-02  7:38 ` Chen, Kenneth W
  2006-11-02  7:45 ` Chen, Kenneth W
                   ` (10 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: Chen, Kenneth W @ 2006-11-02  7:38 UTC (permalink / raw)
  To: linux-ia64

Mao, Bibo wrote on Wednesday, November 01, 2006 7:12 PM
>  On IA64 kprobe can not insert trap code on slot 1 because
> opcode of slot 1 crosses over two consecutive 8-bytes. On
> montecito machine 16 bytes atomic operation is avaiable.
> This patch adds ia64_ld16/ia64_st16 instrins in gcc header
> file, implements atomic instr bundle updating by cpu feature.

Let me take a giant step back and ask you a question:

Have you explored all possible avenue of inserting a probe with
only 8-byte write to slot 1 instruction?  I think it is doable.

- Ken

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (8 preceding siblings ...)
  2006-11-02  7:38 ` Chen, Kenneth W
@ 2006-11-02  7:45 ` Chen, Kenneth W
  2006-11-02  7:52 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
                   ` (9 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: Chen, Kenneth W @ 2006-11-02  7:45 UTC (permalink / raw)
  To: linux-ia64

Mao, Bibo wrote on Wednesday, November 01, 2006 7:12 PM
> @@ -463,7 +469,10 @@ void __kprobes arch_arm_kprobe(struct kp
> 
> 	flush_icache_range((unsigned long)p->ainsn.insn,
> 			(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
> -	memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
> +	if (ATOMIC_UPDATE)
> +		kprobe_update_bundle((void *)arm_addr, (void *)&p->opcode);
> +	else
> +		memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
> 	flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
> }

Now comments on the code: why memcpy in the else statement?  In the earlier
part of the patch, you already reject kprobe address on slot 1 if CPU doesn't
have 16-byte memory operation.  Why do you allow memcpy here? Will the "else"
condition ever be executed?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (9 preceding siblings ...)
  2006-11-02  7:45 ` Chen, Kenneth W
@ 2006-11-02  7:52 ` bibo,mao
  2006-11-02  8:17 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
                   ` (8 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: bibo,mao @ 2006-11-02  7:52 UTC (permalink / raw)
  To: linux-ia64

Chen, Kenneth W wrote:
> Mao, Bibo wrote on Wednesday, November 01, 2006 7:12 PM
>> @@ -463,7 +469,10 @@ void __kprobes arch_arm_kprobe(struct kp
>>
>> 	flush_icache_range((unsigned long)p->ainsn.insn,
>> 			(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
>> -	memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
>> +	if (ATOMIC_UPDATE)
>> +		kprobe_update_bundle((void *)arm_addr, (void *)&p->opcode);
>> +	else
>> +		memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
>> 	flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
>> }
> 
> Now comments on the code: why memcpy in the else statement?  In the earlier
> part of the patch, you already reject kprobe address on slot 1 if CPU doesn't
> have 16-byte memory operation.  Why do you allow memcpy here? Will the "else"
> condition ever be executed?
> 
else means that current cpu does not support 16 byte atomic operation. If kprobe
address is on slot 0/2, then memcpy still can execute.

thanks
bibo,mao 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (10 preceding siblings ...)
  2006-11-02  7:52 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
@ 2006-11-02  8:17 ` Chen, Kenneth W
  2006-11-02  8:56 ` Chen, Kenneth W
                   ` (7 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: Chen, Kenneth W @ 2006-11-02  8:17 UTC (permalink / raw)
  To: linux-ia64

Mao, Bibo wrote on Wednesday, November 01, 2006 11:53 PM
> > Now comments on the code: why memcpy in the else statement?  In the earlier
> > part of the patch, you already reject kprobe address on slot 1 if CPU doesn't
> > have 16-byte memory operation.  Why do you allow memcpy here? Will the "else"
> > condition ever be executed?
> > 
> 
> else means that current cpu does not support 16 byte atomic operation. If kprobe
> address is on slot 0/2, then memcpy still can execute.


I would expect kprobe only writes 8 bytes on the slot that it is patching,
so either lower 8 bytes or upper 8 bytes depends on whether it is slot 0 or
2.  On the other hand, copying the whole 16 bytes bundle won't do any harm
anyway because it will overwrite with the same content.  OK I see that now.

But seriously, considering patch slot 1 instruction with bits slot1[40:18]
(which is nicely contained within the upper 8-byte of a bundle). The encoding
for break instruction takes [40:27], and it left you with 9 bits to encode
immediate value (actually 10 because bit 36 is also part of immediate value).
With that, kprobe on slot1 can be extended to all CPU, not just montecito.

Care to code that up ;-) ??


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (11 preceding siblings ...)
  2006-11-02  8:17 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
@ 2006-11-02  8:56 ` Chen, Kenneth W
  2006-11-02  9:05 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
                   ` (6 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: Chen, Kenneth W @ 2006-11-02  8:56 UTC (permalink / raw)
  To: linux-ia64

Mao, Bibo wrote on Wednesday, November 01, 2006 11:53 PM
> else means that current cpu does not support 16 byte atomic operation.
> If kprobe address is on slot 0/2, then memcpy still can execute.

Do you allow kprobe insertion on "L+X" instruction?  It looks like so.
Then how do you maintain atomic update to L and X instruction right
now with memcpy?

"atomic update" is not really the exact word I want to use here.  Don't
you want to write the upper 8-byte first so that break opcode is jammed
in there before updating the remaining 41-bit immediate value? Otherwise,
writing lower 8-byte first will end up with a small window that original
opcode seeing corrupted immediate value.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (12 preceding siblings ...)
  2006-11-02  8:56 ` Chen, Kenneth W
@ 2006-11-02  9:05 ` bibo,mao
  2006-11-02  9:22 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
                   ` (5 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: bibo,mao @ 2006-11-02  9:05 UTC (permalink / raw)
  To: linux-ia64

Chen, Kenneth W wrote:
> Mao, Bibo wrote on Wednesday, November 01, 2006 11:53 PM
>> else means that current cpu does not support 16 byte atomic operation.
>> If kprobe address is on slot 0/2, then memcpy still can execute.
> 
> Do you allow kprobe insertion on "L+X" instruction?  It looks like so.
> Then how do you maintain atomic update to L and X instruction right
> now with memcpy?
From the code, It allows kprobes insertion on "L+X" instruction, and put
BREAK code on the slot 2. IA64 manual says that "the X slot may encode
break.i and nop.i in addition to any X-unit instruction".
It seems that for MLX, BREAK opcode should insert the higher 8 bytes firstly.
> 
> "atomic update" is not really the exact word I want to use here.  Don't
> you want to write the upper 8-byte first so that break opcode is jammed
> in there before updating the remaining 41-bit immediate value? Otherwise,
> writing lower 8-byte first will end up with a small window that original
> opcode seeing corrupted immediate value.
> 
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (13 preceding siblings ...)
  2006-11-02  9:05 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
@ 2006-11-02  9:22 ` Chen, Kenneth W
  2006-11-02 19:50 ` Chen, Kenneth W
                   ` (4 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: Chen, Kenneth W @ 2006-11-02  9:22 UTC (permalink / raw)
  To: linux-ia64

Mao, Bibo wrote on Thursday, November 02, 2006 1:06 AM
> Chen, Kenneth W wrote:
> > Mao, Bibo wrote on Wednesday, November 01, 2006 11:53 PM
> >> else means that current cpu does not support 16 byte atomic operation.
> >> If kprobe address is on slot 0/2, then memcpy still can execute.
> > 
> > Do you allow kprobe insertion on "L+X" instruction?  It looks like so.
> > Then how do you maintain atomic update to L and X instruction right
> > now with memcpy?
> 
> From the code, It allows kprobes insertion on "L+X" instruction, and put
> BREAK code on the slot 2. IA64 manual says that "the X slot may encode
> break.i and nop.i in addition to any X-unit instruction".
> It seems that for MLX, BREAK opcode should insert the higher 8 bytes firstly.

That's exactly what I'm saying in the 2nd paragraph.


> > "atomic update" is not really the exact word I want to use here.  Don't
> > you want to write the upper 8-byte first so that break opcode is jammed
> > in there before updating the remaining 41-bit immediate value? Otherwise,
> > writing lower 8-byte first will end up with a small window that original
> > opcode seeing corrupted immediate value.

And same logic extends to inserting probe on slot 1 instruction.  The key is
to jam in break opcode, before updating the remaining unused immediate value.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (14 preceding siblings ...)
  2006-11-02  9:22 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
@ 2006-11-02 19:50 ` Chen, Kenneth W
  2006-11-02 19:57 ` Luck, Tony
                   ` (3 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: Chen, Kenneth W @ 2006-11-02 19:50 UTC (permalink / raw)
  To: linux-ia64

Keshavamurthy, Anil wrote on Thursday, November 02, 2006 11:39 AM
> > > > Do you allow kprobe insertion on "L+X" instruction?  It looks like so.
> > > > Then how do you maintain atomic update to L and X instruction right
> > > > now with memcpy?
> 
> 	Instruction Type "L+X" take two instruction slots i.e slot 1 and slot 2,
> but we only modify slot 2 to insert or remove the break and slot 2 nicely
> happens to be in the upper 8 bytes of the instruction bundle. So we don't have any
> issues here. For either inserting or removing break for "L+X" we don;t modify Slot 1
> value at all.

I wasn't aware that X slot can be encoded with break.i.  In that case, yes,
only slot 2 is modified and everything is fine. I guess the compatibility
mode saves the day for patching "L+X".


^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (15 preceding siblings ...)
  2006-11-02 19:50 ` Chen, Kenneth W
@ 2006-11-02 19:57 ` Luck, Tony
  2006-11-02 20:29 ` Chen, Kenneth W
                   ` (2 subsequent siblings)
  19 siblings, 0 replies; 21+ messages in thread
From: Luck, Tony @ 2006-11-02 19:57 UTC (permalink / raw)
  To: linux-ia64

> But seriously, considering patch slot 1 instruction with bits slot1[40:18]
> (which is nicely contained within the upper 8-byte of a bundle). The encoding
> for break instruction takes [40:27], and it left you with 9 bits to encode
> immediate value (actually 10 because bit 36 is also part of immediate value).
> With that, kprobe on slot1 can be extended to all CPU, not just montecito.

Sounds like with some careful trickery (and choice of break value ranges) you
might well be able to *insert* the breakpoint in slot1 with only 8-byte atomic
operations.

But I can't see how you plan to *remove* the breakpoint and restore the
original instruction.

-Tony

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (16 preceding siblings ...)
  2006-11-02 19:57 ` Luck, Tony
@ 2006-11-02 20:29 ` Chen, Kenneth W
  2006-11-03  1:25 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
  2006-11-03  1:55 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
  19 siblings, 0 replies; 21+ messages in thread
From: Chen, Kenneth W @ 2006-11-02 20:29 UTC (permalink / raw)
  To: linux-ia64

Luck, Tony wrote on Thursday, November 02, 2006 11:58 AM
> > But seriously, considering patch slot 1 instruction with bits slot1[40:18]
> > (which is nicely contained within the upper 8-byte of a bundle). The encoding
> > for break instruction takes [40:27], and it left you with 9 bits to encode
> > immediate value (actually 10 because bit 36 is also part of immediate value).
> > With that, kprobe on slot1 can be extended to all CPU, not just montecito.
> 
> Sounds like with some careful trickery (and choice of break value ranges) you
> might well be able to *insert* the breakpoint in slot1 with only 8-byte atomic
> operations.
> 
> But I can't see how you plan to *remove* the breakpoint and restore the
> original instruction.

Why not?  Restore the lower [17:0] bits first, then restore the upper [40:18].
I don't see a problem though.  There is a race window where the break probe
seeing a combined [17:0] from original instruction with its own value from
[26:18]. But why would that matter?  Insertion has the same challenge that
break will see a mixed immediate value between original and its own.  The
solution is to insert upper bits first, then the lower bits. Along with
teaching break handler to ignore all lower bits. I don't see how restore is
any different from insertion except the order in memory updates.

What matters is the opcode.  As long as the opcode is done in one operation,
the operand doesn't make much difference given the simplicity of break
instruction.

Would that work?  I could very well missed something, but haven't seen that
yet.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (17 preceding siblings ...)
  2006-11-02 20:29 ` Chen, Kenneth W
@ 2006-11-03  1:25 ` bibo,mao
  2006-11-03  1:55 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
  19 siblings, 0 replies; 21+ messages in thread
From: bibo,mao @ 2006-11-03  1:25 UTC (permalink / raw)
  To: linux-ia64

Chen, Kenneth W wrote:
> Luck, Tony wrote on Thursday, November 02, 2006 11:58 AM
>>> But seriously, considering patch slot 1 instruction with bits slot1[40:18]
>>> (which is nicely contained within the upper 8-byte of a bundle). The encoding
>>> for break instruction takes [40:27], and it left you with 9 bits to encode
>>> immediate value (actually 10 because bit 36 is also part of immediate value).
>>> With that, kprobe on slot1 can be extended to all CPU, not just montecito.
>> Sounds like with some careful trickery (and choice of break value ranges) you
>> might well be able to *insert* the breakpoint in slot1 with only 8-byte atomic
>> operations.
>>
>> But I can't see how you plan to *remove* the breakpoint and restore the
>> original instruction.
> 
> Why not?  Restore the lower [17:0] bits first, then restore the upper [40:18].
> I don't see a problem though.  There is a race window where the break probe
> seeing a combined [17:0] from original instruction with its own value from
> [26:18]. But why would that matter?  Insertion has the same challenge that
> break will see a mixed immediate value between original and its own.  The
> solution is to insert upper bits first, then the lower bits. Along with
> teaching break handler to ignore all lower bits. I don't see how restore is
> any different from insertion except the order in memory updates.
> 
> What matters is the opcode.  As long as the opcode is done in one operation,
> the operand doesn't make much difference given the simplicity of break
> instruction.
> 
> Would that work?  I could very well missed something, but haven't seen that
> yet.
And then on ia64_bad_break handler, it need discard break vector at the 
[17:6] bits of lower 8 bytes, only need judge upper [40:18] bits. And then
for slot 1 break opcode inserting, only higher 8 bytes opcode need change. 

> 
> 

^ permalink raw reply	[flat|nested] 21+ messages in thread

* RE: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2
  2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
                   ` (18 preceding siblings ...)
  2006-11-03  1:25 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
@ 2006-11-03  1:55 ` Chen, Kenneth W
  19 siblings, 0 replies; 21+ messages in thread
From: Chen, Kenneth W @ 2006-11-03  1:55 UTC (permalink / raw)
  To: linux-ia64

Keshavamurthy, Anil wrote on Thursday, November 02, 2006 5:32 PM
> > > But I can't see how you plan to *remove* the breakpoint and restore the
> > > original instruction.
> 
> Restoring the break is very simple, just restore back the
> upper 23 bits in the upper 8 byte boundary( since we have not
> touched the bits 0:17 which is present in the lower 8 byte boundary)


Sure. If you go with that route, make sure it is documented that kprobe will
fire only if the original instruction gets executed.

A quick grep of kprobes.[ch] shows that the probe instruction always fire
independent of what the original "qp" field is. Above will change probing
behavior.  The former make a lot of sense that one should get a kprobe
trap only When probing instruction gets executed. The latter can equally
argue that unconditional trap in the execution stream has its beauty. I
suppose the jury is all in the hands of usage model.


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2006-11-03  1:55 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-11-02  3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
2006-11-02  3:39 ` Keith Owens
2006-11-02  5:04 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-02  6:51 ` bibo,mao
2006-11-02  7:17 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Keith Owens
2006-11-02  7:22 ` Keith Owens
2006-11-02  7:25 ` Keith Owens
2006-11-02  7:27 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-02  7:32 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Keith Owens
2006-11-02  7:38 ` Chen, Kenneth W
2006-11-02  7:45 ` Chen, Kenneth W
2006-11-02  7:52 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-02  8:17 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
2006-11-02  8:56 ` Chen, Kenneth W
2006-11-02  9:05 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-02  9:22 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
2006-11-02 19:50 ` Chen, Kenneth W
2006-11-02 19:57 ` Luck, Tony
2006-11-02 20:29 ` Chen, Kenneth W
2006-11-03  1:25 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-03  1:55 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox