* [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8
@ 2006-02-24 2:33 Chen, Kenneth W
2006-02-24 10:08 ` Christian Hildner
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: Chen, Kenneth W @ 2006-02-24 2:33 UTC (permalink / raw)
To: linux-ia64
It's not efficient to use a per-cpu variable just to store
how many physical stack register a cpu has. Ever since the
incarnation of ia64 up till upcoming Montecito processor, that
variable has "glued" to 96. Having a variable in memory means
that the kernel is burning an extra cacheline access on every
syscall and kernel exit path. Such "static" value is better
served with the instruction patching utility exists today.
Convert ia64_phys_stacked_size_p8 into dynamic insn patching.
This also has a pleasant side effect of eliminating access to
per-cpu area while psr.ic=0 in the kernel exit path. (fixable
for per-cpu DTC work, but why bother?)
There are some concerns with the default value that the instruc-
tion encoded in the kernel image. It shouldn't be concerned.
The reasons are:
(1) cpu_init() is called at CPU initialization. In there, we
find out physical stack register size from PAL and patch
two instructions in kernel exit code. The code in question
can not be executed before the patching is done.
(2) current implementation stores zero in ia64_phys_stacked_size_p8,
and that's what the current kernel exit path loads the value with.
With the new code, it is equivalent that we store reg size 96
in ia64_phys_stacked_size_p8, thus creating a better safety net.
Given (1) above can never fail, having (2) is just a bonus.
All in all, this patch allow one less memory reference in the kernel
exit path, thus reducing syscall and interrupt return latency; and
avoid polluting potential useful data in the CPU cache.
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
--- linux-2.6.15/arch/ia64/kernel/entry.S.orig 2006-02-23 15:30:59.146883636 -0800
+++ linux-2.6.15/arch/ia64/kernel/entry.S 2006-02-23 15:33:19.305085044 -0800
@@ -768,7 +768,7 @@ ENTRY(ia64_leave_syscall)
ld8.fill r15=[r3] // M0|1 restore r15
mov b6=r18 // I0 restore b6
- addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+ LOAD_PHYS_STACK_REG_SIZE(r17)
mov f9ð // F clear f9
(pKStk) br.cond.dpnt.many skip_rbs_switch // B
@@ -776,7 +776,6 @@ ENTRY(ia64_leave_syscall)
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
cover // B add current frame into dirty partition & set cr.ifs
;;
-(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8
mov r19=ar.bsp // M2 get new backing store pointer
mov f10ð // F clear f10
@@ -954,9 +953,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
shr.u r18=r19,16 // get byte size of existing "dirty" partition
;;
mov r16=ar.bsp // get existing backing store pointer
- addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
- ;;
- ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
+ LOAD_PHYS_STACK_REG_SIZE(r17)
(pKStk) br.cond.dpnt skip_rbs_switch
/*
--- linux-2.6.15/arch/ia64/kernel/vmlinux.lds.S.orig 2006-01-02 19:21:10.000000000 -0800
+++ linux-2.6.15/arch/ia64/kernel/vmlinux.lds.S 2006-02-23 15:31:16.810945919 -0800
@@ -84,6 +84,13 @@ SECTIONS
__end___mckinley_e9_bundles = .;
}
+ .data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - LOAD_OFFSET)
+ {
+ __start___phys_stack_reg_patchlist = .;
+ *(.data.patch.phys_stack_reg)
+ __end___phys_stack_reg_patchlist = .;
+ }
+
/* Global data */
_data = .;
--- linux-2.6.15/arch/ia64/kernel/setup.c.orig 2006-02-23 15:30:59.160555511 -0800
+++ linux-2.6.15/arch/ia64/kernel/setup.c 2006-02-23 15:31:16.810945919 -0800
@@ -75,7 +75,6 @@ extern void ia64_setup_printk_clock(void
DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
-DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
unsigned long ia64_cycles_per_usec;
struct ia64_boot_param *ia64_boot_param;
struct screen_info screen_info;
@@ -767,6 +766,7 @@ void
cpu_init (void)
{
extern void __devinit ia64_mmu_init (void *);
+ static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG;
unsigned long num_phys_stacked;
pal_vm_info_2_u_t vmi;
unsigned int max_ctx;
@@ -880,7 +880,10 @@ cpu_init (void)
num_phys_stacked = 96;
}
/* size of physical stacked register partition plus 8 bytes: */
- __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
+ if (num_phys_stacked > max_num_phys_stacked) {
+ ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8);
+ max_num_phys_stacked = num_phys_stacked;
+ }
platform_cpu_init();
pm_idle = default_idle;
}
--- linux-2.6.15/arch/ia64/kernel/patch.c.orig 2006-01-02 19:21:10.000000000 -0800
+++ linux-2.6.15/arch/ia64/kernel/patch.c 2006-02-23 15:31:16.811922482 -0800
@@ -195,3 +195,23 @@ ia64_patch_gate (void)
ia64_patch_vtop(START(vtop), END(vtop));
ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
}
+
+void ia64_patch_phys_stack_reg(unsigned long val)
+{
+ s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
+ s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
+ u64 ip, mask, imm;
+
+ /* see instruction format A4: adds r1 = imm13, r3 */
+ mask = (0x3fUL << 27) | (0x7f << 13);
+ imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;
+
+ while (offp < end) {
+ ip = (u64) offp + *offp;
+ ia64_patch(ip, mask, imm);
+ ia64_fc(ip);
+ ++offp;
+ }
+ ia64_sync_i();
+ ia64_srlz_i();
+}
--- linux-2.6.15/include/asm-ia64/processor.h.orig 2006-02-23 15:31:02.317782034 -0800
+++ linux-2.6.15/include/asm-ia64/processor.h 2006-02-23 16:51:29.300144779 -0800
@@ -20,6 +20,7 @@
#include <asm/ptrace.h>
#include <asm/ustack.h>
+#define IA64_NUM_PHYS_STACK_REG 0
#define IA64_NUM_DBG_REGS 8
/*
* Limits for PMC and PMD are set to less than maximum architected values
--- linux-2.6.15/include/asm-ia64/asmmacro.h.orig 2006-01-02 19:21:10.000000000 -0800
+++ linux-2.6.15/include/asm-ia64/asmmacro.h 2006-02-23 15:31:16.812899044 -0800
@@ -90,6 +90,16 @@ name:
#endif
/*
+ * If physical stack register size is different from DEF_NUM_STACK_REG,
+ * dynamically patch the kernel for correct size.
+ */
+ .section ".data.patch.phys_stack_reg", "a"
+ .previous
+#define LOAD_PHYS_STACK_REG_SIZE(reg) \
+[1:] adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0; \
+ .xdata4 ".data.patch.phys_stack_reg", 1b-.
+
+/*
* Up until early 2004, use of .align within a function caused bad unwind info.
* TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing
* otherwise.
--- linux-2.6.15/include/asm-ia64/patch.h.orig 2006-01-02 19:21:10.000000000 -0800
+++ linux-2.6.15/include/asm-ia64/patch.h 2006-02-23 15:31:16.812899044 -0800
@@ -20,6 +20,7 @@ extern void ia64_patch_imm60 (u64 insn_a
extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end);
extern void ia64_patch_vtop (unsigned long start, unsigned long end);
+extern void ia64_patch_phys_stack_reg(unsigned long val);
extern void ia64_patch_gate (void);
#endif /* _ASM_IA64_PATCH_H */
--- linux-2.6.15/include/asm-ia64/sections.h.orig 2006-01-02 19:21:10.000000000 -0800
+++ linux-2.6.15/include/asm-ia64/sections.h 2006-02-23 15:31:16.812899044 -0800
@@ -11,6 +11,7 @@
extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
+extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[];
extern char __start_gate_section[];
extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[];
extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[];
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8
2006-02-24 2:33 [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8 Chen, Kenneth W
@ 2006-02-24 10:08 ` Christian Hildner
2006-02-24 18:58 ` Chen, Kenneth W
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Christian Hildner @ 2006-02-24 10:08 UTC (permalink / raw)
To: linux-ia64
Kenneth,
self-modifing code isn't the straight forward way of programming. So
wouldn't it be an idea to let the code crash instead of silently work
with a potentially wrong number of registers here, if by any reason the
patch mechanism doesn't work. I suppose the usage of a break instruction
(with non_syscall number) to indicate failing/missing patch here. That
would add another level of security here.
Christian
Chen, Kenneth W schrieb:
>It's not efficient to use a per-cpu variable just to store
>how many physical stack register a cpu has. Ever since the
>incarnation of ia64 up till upcoming Montecito processor, that
>variable has "glued" to 96. Having a variable in memory means
>that the kernel is burning an extra cacheline access on every
>syscall and kernel exit path. Such "static" value is better
>served with the instruction patching utility exists today.
>Convert ia64_phys_stacked_size_p8 into dynamic insn patching.
>
>This also has a pleasant side effect of eliminating access to
>per-cpu area while psr.ic=0 in the kernel exit path. (fixable
>for per-cpu DTC work, but why bother?)
>
>There are some concerns with the default value that the instruc-
>tion encoded in the kernel image. It shouldn't be concerned.
>The reasons are:
>
>(1) cpu_init() is called at CPU initialization. In there, we
> find out physical stack register size from PAL and patch
> two instructions in kernel exit code. The code in question
> can not be executed before the patching is done.
>
>(2) current implementation stores zero in ia64_phys_stacked_size_p8,
> and that's what the current kernel exit path loads the value with.
> With the new code, it is equivalent that we store reg size 96
> in ia64_phys_stacked_size_p8, thus creating a better safety net.
> Given (1) above can never fail, having (2) is just a bonus.
>
>
>All in all, this patch allow one less memory reference in the kernel
>exit path, thus reducing syscall and interrupt return latency; and
>avoid polluting potential useful data in the CPU cache.
>
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8
2006-02-24 2:33 [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8 Chen, Kenneth W
2006-02-24 10:08 ` Christian Hildner
@ 2006-02-24 18:58 ` Chen, Kenneth W
2006-02-24 19:13 ` Chen, Kenneth W
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Chen, Kenneth W @ 2006-02-24 18:58 UTC (permalink / raw)
To: linux-ia64
Christian Hildner wrote on Friday, February 24, 2006 2:08 AM
> self-modifing code isn't the straight forward way of programming. So
> wouldn't it be an idea to let the code crash instead of silently work
> with a potentially wrong number of registers here, if by any reason the
> patch mechanism doesn't work.
This argument is very biased. A bug is a bug, regardless where the
origin or through which mechanism. Programming error like wrongly
initialize a value has the same severity compare to patching wrong code.
It's just way too biased to say bug in patching mechanism is more sever
than any other buggy code.
Plus, self-modifying code in the kernel is everywhere: look at the core
of the core kernel:
(1) low level vhpt handlers: vhpt_miss and nested_dtlb_miss
(2) fsyscall table
(3) fsys bubble down
(4) mckinley_e9
All does instruction patching. I don't suppose you would recommend all
instances listed above to be changed?
> I suppose the usage of a break instruction
> (with non_syscall number) to indicate failing/missing patch here. That
> would add another level of security here.
This argument is equally flawed. If you don't have any trust in patching
mechanism in previous argument, why would you trust that patch out a break
instruction is going to be any better?
- Ken
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8
2006-02-24 2:33 [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8 Chen, Kenneth W
2006-02-24 10:08 ` Christian Hildner
2006-02-24 18:58 ` Chen, Kenneth W
@ 2006-02-24 19:13 ` Chen, Kenneth W
2006-02-27 8:15 ` Christian Hildner
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Chen, Kenneth W @ 2006-02-24 19:13 UTC (permalink / raw)
To: linux-ia64
Christian Hildner wrote on Friday, February 24, 2006 2:08 AM
> self-modifing code isn't the straight forward way of programming. So
> wouldn't it be an idea to let the code crash instead of silently work
> with a potentially wrong number of registers here, if by any reason the
> patch mechanism doesn't work.
I should've also *STRESS* that with all current Itanium processor
out there, including upcoming Montecito processor, no patch will
be done with this set of patch. The default value is what the
hardware has.
There are *NO* processor exist as of today (including tomorrow, and
next month, and next year) that has physical stack register other
than 96 (which is what is encoded in the instruction right now).
- Ken
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8
2006-02-24 2:33 [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8 Chen, Kenneth W
` (2 preceding siblings ...)
2006-02-24 19:13 ` Chen, Kenneth W
@ 2006-02-27 8:15 ` Christian Hildner
2006-02-27 19:26 ` Chen, Kenneth W
2006-10-13 17:05 ` Chen, Kenneth W
5 siblings, 0 replies; 7+ messages in thread
From: Christian Hildner @ 2006-02-27 8:15 UTC (permalink / raw)
To: linux-ia64
Chen, Kenneth W schrieb:
>Christian Hildner wrote on Friday, February 24, 2006 2:08 AM
>
>
>>self-modifing code isn't the straight forward way of programming. So
>>wouldn't it be an idea to let the code crash instead of silently work
>>with a potentially wrong number of registers here, if by any reason the
>>patch mechanism doesn't work.
>>
>>
>
>This argument is very biased. A bug is a bug, regardless where the
>origin or through which mechanism. Programming error like wrongly
>initialize a value has the same severity compare to patching wrong code.
>
True. However this isn't an argument against it. You might not even
recognize that there is a bug. It might silently fail and maybe nobody
would find it for years. With the break it would fail so you would be
able to fix it immediately. And there is about no additional cost for
using the break instruction.
>This argument is equally flawed. If you don't have any trust in patching
>mechanism in previous argument, why would you trust that patch out a break
>instruction is going to be any better?
>
It is better because it would detect failure in the patch mechanism. Not
more, not less.
Christian
^ permalink raw reply [flat|nested] 7+ messages in thread
* RE: [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8
2006-02-24 2:33 [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8 Chen, Kenneth W
` (3 preceding siblings ...)
2006-02-27 8:15 ` Christian Hildner
@ 2006-02-27 19:26 ` Chen, Kenneth W
2006-10-13 17:05 ` Chen, Kenneth W
5 siblings, 0 replies; 7+ messages in thread
From: Chen, Kenneth W @ 2006-02-27 19:26 UTC (permalink / raw)
To: linux-ia64
Chen, Ken wrote on Thursday, February 23, 2006 6:34 PM
> It's not efficient to use a per-cpu variable just to store
> how many physical stack register a cpu has. Ever since the
> incarnation of ia64 up till upcoming Montecito processor, that
> variable has "glued" to 96. Having a variable in memory means
> that the kernel is burning an extra cacheline access on every
> syscall and kernel exit path. Such "static" value is better
> served with the instruction patching utility exists today.
> Convert ia64_phys_stacked_size_p8 into dynamic insn patching.
>
> This also has a pleasant side effect of eliminating access to
> per-cpu area while psr.ic=0 in the kernel exit path. (fixable
> for per-cpu DTC work, but why bother?)
>
> There are some concerns with the default value that the instruc-
> tion encoded in the kernel image. It shouldn't be concerned.
> The reasons are:
>
> (1) cpu_init() is called at CPU initialization. In there, we
> find out physical stack register size from PAL and patch
> two instructions in kernel exit code. The code in question
> can not be executed before the patching is done.
>
> (2) current implementation stores zero in ia64_phys_stacked_size_p8,
> and that's what the current kernel exit path loads the value with.
> With the new code, it is equivalent that we store reg size 96
> in ia64_phys_stacked_size_p8, thus creating a better safety net.
> Given (1) above can never fail, having (2) is just a bonus.
>
>
> All in all, this patch allow one less memory reference in the kernel
> exit path, thus reducing syscall and interrupt return latency; and
> avoid polluting potential useful data in the CPU cache.
I accidentally posted a variant of patch with IA64_NUM_PHYS_STACK_REG
set to 0 for the purpose of testing out correctness of instruction
patching. Change that value to 96 as advertised.
- Ken
--- linux-2.6.15/include/asm-ia64/processor.h.orig 2006-02-27 12:19:47.793181185 -0800
+++ linux-2.6.15/include/asm-ia64/processor.h 2006-02-27 12:19:56.312712330 -0800
@@ -20,7 +20,7 @@
#include <asm/ptrace.h>
#include <asm/ustack.h>
-#define IA64_NUM_PHYS_STACK_REG 0
+#define IA64_NUM_PHYS_STACK_REG 96
#define IA64_NUM_DBG_REGS 8
/*
* Limits for PMC and PMD are set to less than maximum architected values
^ permalink raw reply [flat|nested] 7+ messages in thread
* [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8
2006-02-24 2:33 [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8 Chen, Kenneth W
` (4 preceding siblings ...)
2006-02-27 19:26 ` Chen, Kenneth W
@ 2006-10-13 17:05 ` Chen, Kenneth W
5 siblings, 0 replies; 7+ messages in thread
From: Chen, Kenneth W @ 2006-10-13 17:05 UTC (permalink / raw)
To: linux-ia64
It's not efficient to use a per-cpu variable just to store
how many physical stack register a cpu has. Ever since the
incarnation of ia64 up till upcoming Montecito processor, that
variable has "glued" to 96. Having a variable in memory means
that the kernel is burning an extra cacheline access on every
syscall and kernel exit path. Such "static" value is better
served with the instruction patching utility exists today.
Convert ia64_phys_stacked_size_p8 into dynamic insn patching.
This also has a pleasant side effect of eliminating access to
per-cpu area while psr.ic=0 in the kernel exit path. (fixable
for per-cpu DTC work, but why bother?)
There are some concerns with the default value that the instruc-
tion encoded in the kernel image. It shouldn't be concerned.
The reasons are:
(1) cpu_init() is called at CPU initialization. In there, we
find out physical stack register size from PAL and patch
two instructions in kernel exit code. The code in question
can not be executed before the patching is done.
(2) current implementation stores zero in ia64_phys_stacked_size_p8,
and that's what the current kernel exit path loads the value with.
With the new code, it is equivalent that we store reg size 96
in ia64_phys_stacked_size_p8, thus creating a better safety net.
Given (1) above can never fail, having (2) is just a bonus.
All in all, this patch allow one less memory reference in the kernel
exit path, thus reducing syscall and interrupt return latency; and
avoid polluting potential useful data in the CPU cache.
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
--- ./include/asm-ia64/patch.h.orig 2006-10-10 19:51:20.000000000 -0700
+++ ./include/asm-ia64/patch.h 2006-10-13 10:35:30.000000000 -0700
@@ -20,6 +20,7 @@ extern void ia64_patch_imm60 (u64 insn_a
extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end);
extern void ia64_patch_vtop (unsigned long start, unsigned long end);
+extern void ia64_patch_phys_stack_reg(unsigned long val);
extern void ia64_patch_gate (void);
#endif /* _ASM_IA64_PATCH_H */
--- ./include/asm-ia64/sections.h.orig 2006-10-10 19:51:20.000000000 -0700
+++ ./include/asm-ia64/sections.h 2006-10-13 10:35:30.000000000 -0700
@@ -11,6 +11,7 @@
extern char __per_cpu_start[], __per_cpu_end[], __phys_per_cpu_start[];
extern char __start___vtop_patchlist[], __end___vtop_patchlist[];
extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[];
+extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[];
extern char __start_gate_section[];
extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[];
extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[];
--- ./include/asm-ia64/asmmacro.h.orig 2006-10-10 19:51:20.000000000 -0700
+++ ./include/asm-ia64/asmmacro.h 2006-10-13 10:35:30.000000000 -0700
@@ -104,6 +104,16 @@ name:
#endif
/*
+ * If physical stack register size is different from DEF_NUM_STACK_REG,
+ * dynamically patch the kernel for correct size.
+ */
+ .section ".data.patch.phys_stack_reg", "a"
+ .previous
+#define LOAD_PHYS_STACK_REG_SIZE(reg) \
+[1:] adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0; \
+ .xdata4 ".data.patch.phys_stack_reg", 1b-.
+
+/*
* Up until early 2004, use of .align within a function caused bad unwind info.
* TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing
* otherwise.
--- ./include/asm-ia64/processor.h.orig 2006-10-10 19:51:20.000000000 -0700
+++ ./include/asm-ia64/processor.h 2006-10-13 10:35:30.000000000 -0700
@@ -19,6 +19,7 @@
#include <asm/ptrace.h>
#include <asm/ustack.h>
+#define IA64_NUM_PHYS_STACK_REG 96
#define IA64_NUM_DBG_REGS 8
#define DEFAULT_MAP_BASE __IA64_UL_CONST(0x2000000000000000)
--- ./arch/ia64/kernel/setup.c.orig 2006-10-10 19:51:10.000000000 -0700
+++ ./arch/ia64/kernel/setup.c 2006-10-13 10:41:50.000000000 -0700
@@ -73,7 +73,6 @@ extern void ia64_setup_printk_clock(void
DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
-DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
unsigned long ia64_cycles_per_usec;
struct ia64_boot_param *ia64_boot_param;
struct screen_info screen_info;
@@ -775,6 +774,7 @@ void __cpuinit
cpu_init (void)
{
extern void __cpuinit ia64_mmu_init (void *);
+ static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG;
unsigned long num_phys_stacked;
pal_vm_info_2_u_t vmi;
unsigned int max_ctx;
@@ -888,7 +888,10 @@ cpu_init (void)
num_phys_stacked = 96;
}
/* size of physical stacked register partition plus 8 bytes: */
- __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
+ if (num_phys_stacked > max_num_phys_stacked) {
+ ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8);
+ max_num_phys_stacked = num_phys_stacked;
+ }
platform_cpu_init();
pm_idle = default_idle;
}
--- ./arch/ia64/kernel/vmlinux.lds.S.orig 2006-10-10 19:51:10.000000000 -0700
+++ ./arch/ia64/kernel/vmlinux.lds.S 2006-10-13 10:35:30.000000000 -0700
@@ -78,6 +78,13 @@ SECTIONS
__stop___mca_table = .;
}
+ .data.patch.phys_stack_reg : AT(ADDR(.data.patch.phys_stack_reg) - LOAD_OFFSET)
+ {
+ __start___phys_stack_reg_patchlist = .;
+ *(.data.patch.phys_stack_reg)
+ __end___phys_stack_reg_patchlist = .;
+ }
+
/* Global data */
_data = .;
--- ./arch/ia64/kernel/entry.S.orig 2006-10-10 19:51:10.000000000 -0700
+++ ./arch/ia64/kernel/entry.S 2006-10-13 10:35:30.000000000 -0700
@@ -767,7 +767,7 @@ ENTRY(ia64_leave_syscall)
ld8.fill r15=[r3] // M0|1 restore r15
mov b6=r18 // I0 restore b6
- addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+ LOAD_PHYS_STACK_REG_SIZE(r17)
mov f9ð // F clear f9
(pKStk) br.cond.dpnt.many skip_rbs_switch // B
@@ -775,7 +775,6 @@ ENTRY(ia64_leave_syscall)
shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
cover // B add current frame into dirty partition & set cr.ifs
;;
-(pUStk) ld4 r17=[r17] // M0|1 r17 = cpu_data->phys_stacked_size_p8
mov r19=ar.bsp // M2 get new backing store pointer
mov f10ð // F clear f10
@@ -953,9 +952,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
shr.u r18=r19,16 // get byte size of existing "dirty" partition
;;
mov r16=ar.bsp // get existing backing store pointer
- addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
- ;;
- ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
+ LOAD_PHYS_STACK_REG_SIZE(r17)
(pKStk) br.cond.dpnt skip_rbs_switch
/*
--- ./arch/ia64/kernel/patch.c.orig 2006-10-10 19:51:10.000000000 -0700
+++ ./arch/ia64/kernel/patch.c 2006-10-13 10:35:30.000000000 -0700
@@ -195,3 +195,23 @@ ia64_patch_gate (void)
ia64_patch_vtop(START(vtop), END(vtop));
ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
}
+
+void ia64_patch_phys_stack_reg(unsigned long val)
+{
+ s32 * offp = (s32 *) __start___phys_stack_reg_patchlist;
+ s32 * end = (s32 *) __end___phys_stack_reg_patchlist;
+ u64 ip, mask, imm;
+
+ /* see instruction format A4: adds r1 = imm13, r3 */
+ mask = (0x3fUL << 27) | (0x7f << 13);
+ imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13;
+
+ while (offp < end) {
+ ip = (u64) offp + *offp;
+ ia64_patch(ip, mask, imm);
+ ia64_fc(ip);
+ ++offp;
+ }
+ ia64_sync_i();
+ ia64_srlz_i();
+}
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2006-10-13 17:05 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-02-24 2:33 [patch 1/2] remove per-cpu ia64_phys_stacked_size_p8 Chen, Kenneth W
2006-02-24 10:08 ` Christian Hildner
2006-02-24 18:58 ` Chen, Kenneth W
2006-02-24 19:13 ` Chen, Kenneth W
2006-02-27 8:15 ` Christian Hildner
2006-02-27 19:26 ` Chen, Kenneth W
2006-10-13 17:05 ` Chen, Kenneth W
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox