From: "bibo,mao" <bibo.mao@intel.com>
To: linux-ia64@vger.kernel.org
Subject: Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take
Date: Thu, 02 Nov 2006 06:51:18 +0000 [thread overview]
Message-ID: <45499566.7020800@intel.com> (raw)
In-Reply-To: <454961EE.4070608@intel.com>
Keith Owens wrote:
> "bibo,mao" (on Thu, 02 Nov 2006 11:11:42 +0800) wrote:
>> +#define ia64_ld16(low, addr) \
>> + asm volatile(";;ld16 %0=[%1];;":"=r"(low):"r"(addr):"memory")
>> +#define ia64_st16(low, addr) \
>> + asm volatile(";;st16 [%1]=%0;;"::"r"(low),"r"(addr):"memory")
>> ...
>> +#define ia64_st16(low, addr) __st16(__sttype_none, __sthint_none, addr, low)
>> +#define ia64_ld16(low, addr) \
>> + low = __ld16(__ldtype_none, __ldtype_none, addr)
>> +
>
> ld16 clobbers ar.csd, that needs to be added to the definition of
> ia64_ld16.
It seems that gcc does not support inline asm clobber for ar.csd register,
so here I leave clobber register as empty.
>
> ia64_ld16 does not need a memory clobber.
memory clobber is removed here.
>
> Strictly speaking, ia64_st16 does not need a memory clobber. addr
> should be a write operand (not read as you have it) and gcc should see
> that addr is clobbered. However we clobber 16 bytes starting at addr
> and I suspect that gcc has no way of telling about the second set of 8
> bytes. In this case, we may have to stick with a memory clobber on
> ia64_st16.
>
>> +#define kprobe_update_bundle(dest, src) \
>> +do { \
>> + unsigned long low; \
>> + ia64_ld16(low, src); \
>> + ia64_st16(low, dest); \
>> +} while (0)
>
> Using unsigned long (8 bytes) is misleading for a 16 byte operation.
> Not sure what we can do about that.
-------------------------------------------------------------------
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/arch/ia64/kernel/kprobes.c 2.6.19-rc2/arch/ia64/kernel/kprobes.c
--- 2.6.19-rc2.org/arch/ia64/kernel/kprobes.c 2006-10-27 16:39:29.000000000 +0800
+++ 2.6.19-rc2/arch/ia64/kernel/kprobes.c 2006-11-01 19:05:18.000000000 +0800
@@ -296,7 +296,7 @@ static int __kprobes valid_kprobe_addr(i
return -EINVAL;
}
- if (slot = 1 && bundle_encoding[template][1] != L) {
+ if (slot = 1 && bundle_encoding[template][1] != L && !ATOMIC_UPDATE) {
printk(KERN_WARNING "Inserting kprobes on slot #1 "
"is not supported\n");
return -EINVAL;
@@ -448,6 +448,12 @@ int __kprobes arch_prepare_kprobe(struct
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
return -ENOMEM;
+ if (unlikely(((unsigned long)&p->opcode & 0xF)
+ || ((unsigned long)p->ainsn.insn & 0xF))) {
+ printk(KERN_WARNING "Kprobes opcode 16-bytes unalignment\n ");
+ return -EINVAL;
+ }
+
memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t));
memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t));
@@ -463,7 +469,10 @@ void __kprobes arch_arm_kprobe(struct kp
flush_icache_range((unsigned long)p->ainsn.insn,
(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
- memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
+ if (ATOMIC_UPDATE)
+ kprobe_update_bundle((void *)arm_addr, (void *)&p->opcode);
+ else
+ memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
}
@@ -473,8 +482,11 @@ void __kprobes arch_disarm_kprobe(struct
unsigned long arm_addr = addr & ~0xFULL;
/* p->ainsn.insn contains the original unaltered kprobe_opcode_t */
- memcpy((char *) arm_addr, (char *) p->ainsn.insn,
- sizeof(kprobe_opcode_t));
+ if (ATOMIC_UPDATE)
+ kprobe_update_bundle((void *)arm_addr, (void *) p->ainsn.insn);
+ else
+ memcpy((char *) arm_addr, (char *) p->ainsn.insn,
+ sizeof(kprobe_opcode_t));
flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
}
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/gcc_intrin.h 2.6.19-rc2/include/asm-ia64/gcc_intrin.h
--- 2.6.19-rc2.org/include/asm-ia64/gcc_intrin.h 2005-08-29 07:41:01.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/gcc_intrin.h 2006-11-02 15:28:41.000000000 +0800
@@ -598,4 +598,23 @@ do { \
:: "r"((x)) : "p6", "p7", "memory"); \
} while (0)
+/* ld16/st16 instruction will fault if issued to UC, UCE, or WC memory.
+ * An aligned ld16/st16 instruction is performed as an atomic 16-byte
+ * memory reference. For these instructions, the address specified must
+ * be 16-byte aligned.
+ * ia64_ld16 macro will load the lower 64 bits to low parameter, and higher
+ * 64 bits value is put to ar.csd register. In order to get the 128 bits
+ * value, this can be used:
+ * ia64_ld16(low, addr);
+ * high = ia64_getreg(_IA64_REG_AR_CSD);
+ * ia64_st16 macro will store low parameter value to lower 64 bits, and
+ * ar.csd register to higher 64 bits. In order to store 128 bits value
+ * this can be used:
+ * ia64_setreg(_IA64_REG_AR_CSD, high);
+ * ia64_st16(low, addr);
+ */
+#define ia64_ld16(low, addr) \
+ asm volatile(";;ld16 %0, ar.csd =[%1];; ":"=r"(low): "r"(addr))
+#define ia64_st16(low, addr) \
+ asm volatile(";;st16 [%1]=%0, ar.csd;; "::"r"(low),"r"(addr):"memory")
#endif /* _ASM_IA64_GCC_INTRIN_H */
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/intel_intrin.h 2.6.19-rc2/include/asm-ia64/intel_intrin.h
--- 2.6.19-rc2.org/include/asm-ia64/intel_intrin.h 2006-07-24 10:47:13.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/intel_intrin.h 2006-11-01 18:38:13.000000000 +0800
@@ -152,6 +152,10 @@ do { \
} \
} while (0)
+#define ia64_st16(low, addr) __st16(__sttype_none, __sthint_none, addr, low)
+#define ia64_ld16(low, addr) \
+ low = __ld16(__ldtype_none, __ldtype_none, addr)
+
#define __builtin_trap() __break(0);
#endif /* _ASM_IA64_INTEL_INTRIN_H */
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/kprobes.h 2.6.19-rc2/include/asm-ia64/kprobes.h
--- 2.6.19-rc2.org/include/asm-ia64/kprobes.h 2006-10-27 16:39:34.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/kprobes.h 2006-11-02 15:20:54.000000000 +0800
@@ -88,6 +88,7 @@ struct kprobe_ctlblk {
#define SLOT0_OPCODE_SHIFT (37)
#define SLOT1_p1_OPCODE_SHIFT (37 - (64-46))
#define SLOT2_OPCODE_SHIFT (37)
+#define ATOMIC_UPDATE (local_cpu_data->features & ITANIUM_CPUID4_AO)
#define INDIRECT_CALL_OPCODE (1)
#define IP_RELATIVE_CALL_OPCODE (5)
@@ -96,6 +97,17 @@ struct kprobe_ctlblk {
#define LONG_BRANCH_OPCODE (0xC)
#define LONG_CALL_OPCODE (0xD)
#define flush_insn_slot(p) do { } while (0)
+/* this macro will first get 128 bits value from src address,
+ * and then copy this 128 bits value to dest address. It uses
+ * ar.csd register to load and store higher 64 bits value,
+ * and low 64 bits value is passed by low variant.
+ */
+#define kprobe_update_bundle(dest, src) \
+do { \
+ unsigned long low; \
+ ia64_ld16(low, src); \
+ ia64_st16(low, dest); \
+} while (0)
typedef struct kprobe_opcode {
bundle_t bundle;
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/kregs.h 2.6.19-rc2/include/asm-ia64/kregs.h
--- 2.6.19-rc2.org/include/asm-ia64/kregs.h 2005-08-29 07:41:01.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/kregs.h 2006-11-01 18:54:37.000000000 +0800
@@ -160,4 +160,7 @@
#define IA64_ISR_CODE_LFETCH 4
#define IA64_ISR_CODE_PROBEF 5
+/* CPUID 4 Register */
+#define ITANIUM_CPUID4_AO_BIT 2
+#define ITANIUM_CPUID4_AO (__IA64_UL(1) << ITANIUM_CPUID4_AO_BIT)
#endif /* _ASM_IA64_kREGS_H */
next prev parent reply other threads:[~2006-11-02 6:51 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-11-02 3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
2006-11-02 3:39 ` Keith Owens
2006-11-02 5:04 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-02 6:51 ` bibo,mao [this message]
2006-11-02 7:17 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Keith Owens
2006-11-02 7:22 ` Keith Owens
2006-11-02 7:25 ` Keith Owens
2006-11-02 7:27 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-02 7:32 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Keith Owens
2006-11-02 7:38 ` Chen, Kenneth W
2006-11-02 7:45 ` Chen, Kenneth W
2006-11-02 7:52 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-02 8:17 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
2006-11-02 8:56 ` Chen, Kenneth W
2006-11-02 9:05 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-02 9:22 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
2006-11-02 19:50 ` Chen, Kenneth W
2006-11-02 19:57 ` Luck, Tony
2006-11-02 20:29 ` Chen, Kenneth W
2006-11-03 1:25 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-03 1:55 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=45499566.7020800@intel.com \
--to=bibo.mao@intel.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox