From: "bibo,mao" <bibo.mao@intel.com>
To: linux-ia64@vger.kernel.org
Subject: Re: [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take
Date: Thu, 02 Nov 2006 06:51:18 +0000 [thread overview]
Message-ID: <45499566.7020800@intel.com> (raw)
In-Reply-To: <454961EE.4070608@intel.com>
Keith Owens wrote:
> "bibo,mao" (on Thu, 02 Nov 2006 11:11:42 +0800) wrote:
>> +#define ia64_ld16(low, addr) \
>> + asm volatile(";;ld16 %0=[%1];;":"=r"(low):"r"(addr):"memory")
>> +#define ia64_st16(low, addr) \
>> + asm volatile(";;st16 [%1]=%0;;"::"r"(low),"r"(addr):"memory")
>> ...
>> +#define ia64_st16(low, addr) __st16(__sttype_none, __sthint_none, addr, low)
>> +#define ia64_ld16(low, addr) \
>> + low = __ld16(__ldtype_none, __ldtype_none, addr)
>> +
>
> ld16 clobbers ar.csd, that needs to be added to the definition of
> ia64_ld16.
It seems that gcc does not support inline asm clobber for ar.csd register,
so here I leave clobber register as empty.
>
> ia64_ld16 does not need a memory clobber.
memory clobber is removed here.
>
> Strictly speaking, ia64_st16 does not need a memory clobber. addr
> should be a write operand (not read as you have it) and gcc should see
> that addr is clobbered. However we clobber 16 bytes starting at addr
> and I suspect that gcc has no way of telling about the second set of 8
> bytes. In this case, we may have to stick with a memory clobber on
> ia64_st16.
>
>> +#define kprobe_update_bundle(dest, src) \
>> +do { \
>> + unsigned long low; \
>> + ia64_ld16(low, src); \
>> + ia64_st16(low, dest); \
>> +} while (0)
>
> Using unsigned long (8 bytes) is misleading for a 16 byte operation.
> Not sure what we can do about that.
-------------------------------------------------------------------
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/arch/ia64/kernel/kprobes.c 2.6.19-rc2/arch/ia64/kernel/kprobes.c
--- 2.6.19-rc2.org/arch/ia64/kernel/kprobes.c 2006-10-27 16:39:29.000000000 +0800
+++ 2.6.19-rc2/arch/ia64/kernel/kprobes.c 2006-11-01 19:05:18.000000000 +0800
@@ -296,7 +296,7 @@ static int __kprobes valid_kprobe_addr(i
return -EINVAL;
}
- if (slot = 1 && bundle_encoding[template][1] != L) {
+ if (slot = 1 && bundle_encoding[template][1] != L && !ATOMIC_UPDATE) {
printk(KERN_WARNING "Inserting kprobes on slot #1 "
"is not supported\n");
return -EINVAL;
@@ -448,6 +448,12 @@ int __kprobes arch_prepare_kprobe(struct
p->ainsn.insn = get_insn_slot();
if (!p->ainsn.insn)
return -ENOMEM;
+ if (unlikely(((unsigned long)&p->opcode & 0xF)
+ || ((unsigned long)p->ainsn.insn & 0xF))) {
+ printk(KERN_WARNING "Kprobes opcode 16-bytes unalignment\n ");
+ return -EINVAL;
+ }
+
memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t));
memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t));
@@ -463,7 +469,10 @@ void __kprobes arch_arm_kprobe(struct kp
flush_icache_range((unsigned long)p->ainsn.insn,
(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
- memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
+ if (ATOMIC_UPDATE)
+ kprobe_update_bundle((void *)arm_addr, (void *)&p->opcode);
+ else
+ memcpy((char *)arm_addr, &p->opcode, sizeof(kprobe_opcode_t));
flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
}
@@ -473,8 +482,11 @@ void __kprobes arch_disarm_kprobe(struct
unsigned long arm_addr = addr & ~0xFULL;
/* p->ainsn.insn contains the original unaltered kprobe_opcode_t */
- memcpy((char *) arm_addr, (char *) p->ainsn.insn,
- sizeof(kprobe_opcode_t));
+ if (ATOMIC_UPDATE)
+ kprobe_update_bundle((void *)arm_addr, (void *) p->ainsn.insn);
+ else
+ memcpy((char *) arm_addr, (char *) p->ainsn.insn,
+ sizeof(kprobe_opcode_t));
flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t));
}
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/gcc_intrin.h 2.6.19-rc2/include/asm-ia64/gcc_intrin.h
--- 2.6.19-rc2.org/include/asm-ia64/gcc_intrin.h 2005-08-29 07:41:01.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/gcc_intrin.h 2006-11-02 15:28:41.000000000 +0800
@@ -598,4 +598,23 @@ do { \
:: "r"((x)) : "p6", "p7", "memory"); \
} while (0)
+/* ld16/st16 instruction will fault if issued to UC, UCE, or WC memory.
+ * An aligned ld16/st16 instruction is performed as an atomic 16-byte
+ * memory reference. For these instructions, the address specified must
+ * be 16-byte aligned.
+ * ia64_ld16 macro will load the lower 64 bits to low parameter, and higher
+ * 64 bits value is put to ar.csd register. In order to get the 128 bits
+ * value, this can be used:
+ * ia64_ld16(low, addr);
+ * high = ia64_getreg(_IA64_REG_AR_CSD);
+ * ia64_st16 macro will store low parameter value to lower 64 bits, and
+ * ar.csd register to higher 64 bits. In order to store 128 bits value
+ * this can be used:
+ * ia64_setreg(_IA64_REG_AR_CSD, high);
+ * ia64_st16(low, addr);
+ */
+#define ia64_ld16(low, addr) \
+ asm volatile(";;ld16 %0, ar.csd =[%1];; ":"=r"(low): "r"(addr))
+#define ia64_st16(low, addr) \
+ asm volatile(";;st16 [%1]=%0, ar.csd;; "::"r"(low),"r"(addr):"memory")
#endif /* _ASM_IA64_GCC_INTRIN_H */
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/intel_intrin.h 2.6.19-rc2/include/asm-ia64/intel_intrin.h
--- 2.6.19-rc2.org/include/asm-ia64/intel_intrin.h 2006-07-24 10:47:13.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/intel_intrin.h 2006-11-01 18:38:13.000000000 +0800
@@ -152,6 +152,10 @@ do { \
} \
} while (0)
+#define ia64_st16(low, addr) __st16(__sttype_none, __sthint_none, addr, low)
+#define ia64_ld16(low, addr) \
+ low = __ld16(__ldtype_none, __ldtype_none, addr)
+
#define __builtin_trap() __break(0);
#endif /* _ASM_IA64_INTEL_INTRIN_H */
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/kprobes.h 2.6.19-rc2/include/asm-ia64/kprobes.h
--- 2.6.19-rc2.org/include/asm-ia64/kprobes.h 2006-10-27 16:39:34.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/kprobes.h 2006-11-02 15:20:54.000000000 +0800
@@ -88,6 +88,7 @@ struct kprobe_ctlblk {
#define SLOT0_OPCODE_SHIFT (37)
#define SLOT1_p1_OPCODE_SHIFT (37 - (64-46))
#define SLOT2_OPCODE_SHIFT (37)
+#define ATOMIC_UPDATE (local_cpu_data->features & ITANIUM_CPUID4_AO)
#define INDIRECT_CALL_OPCODE (1)
#define IP_RELATIVE_CALL_OPCODE (5)
@@ -96,6 +97,17 @@ struct kprobe_ctlblk {
#define LONG_BRANCH_OPCODE (0xC)
#define LONG_CALL_OPCODE (0xD)
#define flush_insn_slot(p) do { } while (0)
+/* this macro will first get 128 bits value from src address,
+ * and then copy this 128 bits value to dest address. It uses
+ * ar.csd register to load and store higher 64 bits value,
+ * and low 64 bits value is passed by low variant.
+ */
+#define kprobe_update_bundle(dest, src) \
+do { \
+ unsigned long low; \
+ ia64_ld16(low, src); \
+ ia64_st16(low, dest); \
+} while (0)
typedef struct kprobe_opcode {
bundle_t bundle;
diff -Nrup -X 2.6.19-rc2.org/Documentation/dontdiff 2.6.19-rc2.org/include/asm-ia64/kregs.h 2.6.19-rc2/include/asm-ia64/kregs.h
--- 2.6.19-rc2.org/include/asm-ia64/kregs.h 2005-08-29 07:41:01.000000000 +0800
+++ 2.6.19-rc2/include/asm-ia64/kregs.h 2006-11-01 18:54:37.000000000 +0800
@@ -160,4 +160,7 @@
#define IA64_ISR_CODE_LFETCH 4
#define IA64_ISR_CODE_PROBEF 5
+/* CPUID 4 Register */
+#define ITANIUM_CPUID4_AO_BIT 2
+#define ITANIUM_CPUID4_AO (__IA64_UL(1) << ITANIUM_CPUID4_AO_BIT)
#endif /* _ASM_IA64_kREGS_H */
next prev parent reply other threads:[~2006-11-02 6:51 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-11-02 3:11 [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 bibo,mao
2006-11-02 3:39 ` Keith Owens
2006-11-02 5:04 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-02 6:51 ` bibo,mao [this message]
2006-11-02 7:17 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Keith Owens
2006-11-02 7:22 ` Keith Owens
2006-11-02 7:25 ` Keith Owens
2006-11-02 7:27 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-02 7:32 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Keith Owens
2006-11-02 7:38 ` Chen, Kenneth W
2006-11-02 7:45 ` Chen, Kenneth W
2006-11-02 7:52 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-02 8:17 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
2006-11-02 8:56 ` Chen, Kenneth W
2006-11-02 9:05 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-02 9:22 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
2006-11-02 19:50 ` Chen, Kenneth W
2006-11-02 19:57 ` Luck, Tony
2006-11-02 20:29 ` Chen, Kenneth W
2006-11-03 1:25 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take bibo,mao
2006-11-03 1:55 ` [PATCH]IA64 trap code 16 bytes atomic copy on montecito, take 2 Chen, Kenneth W
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=45499566.7020800@intel.com \
--to=bibo.mao@intel.com \
--cc=linux-ia64@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.