* [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
@ 2003-04-17 23:57 ` Jesse Barnes
2003-04-25 21:02 ` Jesse Barnes
` (57 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-04-17 23:57 UTC (permalink / raw)
To: linux-ia64
Ok, great. I'll post it when I have it.
Thanks,
Jesse
On Thu, Apr 17, 2003 at 04:05:29PM -0700, David Mosberger wrote:
> >>>>> On Thu, 17 Apr 2003 15:50:02 -0700, Jesse Barnes <jbarnes@sgi.com> said:
>
> Jesse> David, is something like this ok with you? On our systems,
> Jesse> elilo won't load the kernel where head.S expects it, so we
> Jesse> look at the ip to figure it out.
>
> I don't have an issue with the particular patch, but I don't like to
> take new features incrementally. At the moment, the normal ia64
> kernel doesn't support loading at arbitrary physical addresses. If
> there is a complete patch to add such a feature, we should consider it
> for inclusion.
>
> --david
^ permalink raw reply [flat|nested] 60+ messages in thread* [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
2003-04-17 23:57 ` Jesse Barnes
@ 2003-04-25 21:02 ` Jesse Barnes
2003-05-07 22:39 ` David Mosberger
` (56 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-04-25 21:02 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 858 bytes --]
On Thu, Apr 17, 2003 at 04:05:29PM -0700, David Mosberger wrote:
> I don't have an issue with the particular patch, but I don't like to
> take new features incrementally. At the moment, the normal ia64
> kernel doesn't support loading at arbitrary physical addresses. If
> there is a complete patch to add such a feature, we should consider it
> for inclusion.
Chris Wedgwood reminded me that Tony posted one awhile back. I wonder
what you think of his approach? I've attached it for reference since
the linux-ia64 archive only had a mangled copy. It's against 2.5.39.
If you have any other ideas about how we can address this, I'd like to
hear them. Ideally, a generic kernel would work on all ia64 platforms
at the same virtual load address (which is one problem with Tony's old
patch IMO), regardless of physical load address.
Thanks a lot,
Jesse
[-- Attachment #2: 01-config_ia64_vmap_kernel.diff --]
[-- Type: text/plain, Size: 30034 bytes --]
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/Config.help aegl/arch/ia64/Config.help
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/Config.help Wed Oct 9 11:39:29 2002
+++ aegl/arch/ia64/Config.help Mon Oct 14 10:23:04 2002
@@ -567,3 +567,9 @@
Select "16MB" for a small granule size.
Select "64MB" for a large granule size. This is the current default.
+
+CONFIG_IA64_VMAP_KERNEL
+ If you say Y here, the kernel will be configured to map the kernel
+ to the top 4GB of the 64-bit virtual space. This is needed for
+ certain ccNUMA machines that do not guarantee the presence of
+ memory at any particular physical address.
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/config.in aegl/arch/ia64/config.in
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/config.in Wed Oct 9 11:39:29 2002
+++ aegl/arch/ia64/config.in Mon Oct 7 10:46:30 2002
@@ -71,6 +71,8 @@
define_bool CONFIG_IOSAPIC y
fi
+bool 'Virtual mapped kernel' CONFIG_IA64_VMAP_KERNEL
+
if [ "$CONFIG_IA64_SGI_SN1" = "y" -o "$CONFIG_IA64_SGI_SN2" = "y" ]; then
define_bool CONFIG_IA64_SGI_SN y
bool ' Enable extra debugging code' CONFIG_IA64_SGI_SN_DEBUG
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/efi_stub.S aegl/arch/ia64/kernel/efi_stub.S
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/efi_stub.S Fri Sep 27 14:48:34 2002
+++ aegl/arch/ia64/kernel/efi_stub.S Wed Oct 9 09:48:06 2002
@@ -62,7 +62,7 @@
mov b6=r2
;;
andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
- br.call.sptk.many rp=ia64_switch_mode
+ br.call.sptk.many rp=ia64_switch_mode_phys
.ret0: mov out4=in5
mov out0=in1
mov out1=in2
@@ -73,7 +73,7 @@
br.call.sptk.many rp=b6 // call the EFI function
.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3
- br.call.sptk.many rp=ia64_switch_mode // return to virtual mode
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
.ret2: mov ar.rsc=loc4 // restore RSE configuration
mov ar.pfs=loc1
mov rp=loc0
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/entry.S aegl/arch/ia64/kernel/entry.S
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/entry.S Fri Sep 27 14:49:16 2002
+++ aegl/arch/ia64/kernel/entry.S Wed Oct 9 16:23:58 2002
@@ -175,6 +175,14 @@
;;
st8 [r22]=sp // save kernel stack pointer of old task
shr.u r26=r20,IA64_GRANULE_SHIFT
+#ifdef CONFIG_IA64_VMAP_KERNEL
+ adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
+ ;;
+ /*
+ * If we've already mapped this task's page, we can skip doing it again.
+ */
+ cmp.eq p7,p6=r26,r27
+#else
shr.u r17=r20,KERNEL_TR_PAGE_SHIFT
;;
cmp.ne p6,p7=KERNEL_TR_PAGE_NUM,r17
@@ -184,6 +192,7 @@
* If we've already mapped this task's page, we can skip doing it again.
*/
(p6) cmp.eq p7,p6=r26,r27
+#endif
(p6) br.cond.dpnt .map
;;
.done:
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/head.S aegl/arch/ia64/kernel/head.S
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/head.S Fri Sep 27 14:50:22 2002
+++ aegl/arch/ia64/kernel/head.S Thu Oct 10 10:47:22 2002
@@ -75,7 +75,16 @@
mov cr.itir=r18
mov cr.ifa=r17
mov r16=IA64_TR_KERNEL
+#ifdef CONFIG_IA64_VMAP_KERNEL
+ mov r3=ip
+ movl r18=PAGE_KERNEL
+ ;;
+ dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
+ ;;
+ or r18=r2,r18
+#else
movl r18=((1 << KERNEL_TR_PAGE_SHIFT) | PAGE_KERNEL)
+#endif
;;
srlz.i
;;
@@ -144,15 +153,43 @@
cmp.eq isBP,isAP=r0,r0
#endif
;;
+#ifdef CONFIG_IA64_VMAP_KERNEL
+ tpa r3=r2 // r3 == phys addr of task struct
+ // load mapping for stack (virtaddr in r2, physaddr in r3)
+ rsm psr.ic
+ movl r17=PAGE_KERNEL
+ ;;
+ srlz.d
+ dep r18=0,r3,0,12
+ ;;
+ or r18=r17,r18
+ dep r2=-1,r3,61,3 // IMVA of task
+ ;;
+ mov r17=rr[r2]
+ shr.u r16=r3,IA64_GRANULE_SHIFT
+ ;;
+ dep r17=0,r17,8,24
+ ;;
+ mov cr.itir=r17
+ mov cr.ifa=r2
+
+ mov r19=IA64_TR_CURRENT_STACK
+ ;;
+ itr.d dtr[r19]=r18
+ ;;
+ ssm psr.ic
+ srlz.d
+#else
extr r3=r2,0,61 // r3 == phys addr of task struct
mov r16=KERNEL_TR_PAGE_NUM
;;
+#endif
// load the "current" pointer (r13) and ar.k6 with the current task
mov r13=r2
mov IA64_KR(CURRENT)=r3 // Physical address
- // initialize k4 to a safe value (64-128MB is mapped by TR_KERNEL)
+ // initialize k4 to granulized page number of stack
mov IA64_KR(CURRENT_STACK)=r16
/*
* Reserve space at the top of the stack for "struct pt_regs". Kernel threads
@@ -668,14 +705,14 @@
END(__ia64_init_fpu)
/*
- * Switch execution mode from virtual to physical or vice versa.
+ * Switch execution mode from virtual to physical
*
* Inputs:
* r16 = new psr to establish
*
* Note: RSE must already be in enforced lazy mode
*/
-GLOBAL_ENTRY(ia64_switch_mode)
+GLOBAL_ENTRY(ia64_switch_mode_phys)
{
alloc r2=ar.pfs,0,0,0,0
rsm psr.i | psr.ic // disable interrupts and interrupt collection
@@ -685,35 +722,86 @@
{
flushrs // must be first insn in group
srlz.i
- shr.u r19=r15,61 // r19 <- top 3 bits of current IP
}
;;
mov cr.ipsr=r16 // set new PSR
- add r3=1f-ia64_switch_mode,r15
- xor r15=0x7,r19 // flip the region bits
+ add r3=1f-ia64_switch_mode_phys,r15
mov r17=ar.bsp
mov r14=rp // get return address into a general register
+ ;;
- // switch RSE backing store:
+ // going to physical mode, use tpa to translate virt->phys
+ tpa r17=r17
+ tpa r3=r3
+ tpa sp=sp
+ tpa r14=r14
;;
- dep r17=r15,r17,61,3 // make ar.bsp physical or virtual
+
mov r18=ar.rnat // save ar.rnat
- ;;
mov ar.bspstore=r17 // this steps on ar.rnat
- dep r3=r15,r3,61,3 // make rfi return address physical or virtual
+ mov cr.iip=r3
+ mov cr.ifs=r0
+ ;;
+ mov ar.rnat=r18 // restore ar.rnat
+ rfi // must be last insn in group
+ ;;
+1: mov rp=r14
+ br.ret.sptk.many rp
+END(ia64_switch_mode_phys)
+
+/*
+ * Switch execution mode from physical to virtual
+ *
+ * Inputs:
+ * r16 = new psr to establish
+ *
+ * Note: RSE must already be in enforced lazy mode
+ */
+GLOBAL_ENTRY(ia64_switch_mode_virt)
+ {
+ alloc r2=ar.pfs,0,0,0,0
+ rsm psr.i | psr.ic // disable interrupts and interrupt collection
+ mov r15=ip
+ }
+ ;;
+ {
+ flushrs // must be first insn in group
+ srlz.i
+ }
+ ;;
+ mov cr.ipsr=r16 // set new PSR
+ add r3=1f-ia64_switch_mode_virt,r15
+
+ mov r17=ar.bsp
+ mov r14=rp // get return address into a general register
+ ;;
+
+ // going to virtual
+ // - for code addresses, set upper bits of addr to KERNEL_START
+ // - for stack addresses, set upper 3 bits to 0xe.... Dont change any of the
+ // lower bits since we want it to stay identity mapped
+ movl r18=KERNEL_START
+ dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+ dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+ dep r17=-1,r17,61,3
+ dep sp=-1,sp,61,3
+ ;;
+ or r3=r3,r18
+ or r14=r14,r18
;;
+
+ mov r18=ar.rnat // save ar.rnat
+ mov ar.bspstore=r17 // this steps on ar.rnat
mov cr.iip=r3
mov cr.ifs=r0
- dep sp=r15,sp,61,3 // make stack pointer physical or virtual
;;
mov ar.rnat=r18 // restore ar.rnat
- dep r14=r15,r14,61,3 // make function return address physical or virtual
rfi // must be last insn in group
;;
1: mov rp=r14
br.ret.sptk.many rp
-END(ia64_switch_mode)
+END(ia64_switch_mode_virt)
#ifdef CONFIG_IA64_BRL_EMU
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/ia64_ksyms.c aegl/arch/ia64/kernel/ia64_ksyms.c
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/ia64_ksyms.c Fri Sep 27 14:49:53 2002
+++ aegl/arch/ia64/kernel/ia64_ksyms.c Wed Oct 9 13:43:37 2002
@@ -143,3 +143,6 @@
#endif
EXPORT_SYMBOL(machvec_noop);
+#ifdef CONFIG_IA64_VMAP_KERNEL
+EXPORT_SYMBOL(zero_page_memmap_ptr);
+#endif
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/ivt.S aegl/arch/ia64/kernel/ivt.S
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/ivt.S Fri Sep 27 14:49:08 2002
+++ aegl/arch/ia64/kernel/ivt.S Wed Oct 9 10:17:58 2002
@@ -122,8 +122,18 @@
shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+#ifdef CONFIG_IA64_VMAP_KERNEL
+ .global ia64_ivt_patch1
+ia64_ivt_patch1:
+{ .mlx // we patch this bundle to include physical address of swapper_pg_dir
+ srlz.d // ensure "rsm psr.dt" has taken effect
+(p6) movl r19=swapper_pg_dir // region 5 is rooted at swapper_pg_dir
+}
+ .pred.rel "mutex", p6, p7
+#else
srlz.d // ensure "rsm psr.dt" has taken effect
(p6) movl r19=__pa(swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+#endif
(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
;;
@@ -415,8 +425,18 @@
shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+#ifdef CONFIG_IA64_VMAP_KERNEL
+ .global ia64_ivt_patch2
+ia64_ivt_patch2:
+{ .mlx // we patch this bundle to include physical address of swapper_pg_dir
+ srlz.d // ensure "rsm psr.dt" has taken effect
+(p6) movl r19=swapper_pg_dir // region 5 is rooted at swapper_pg_dir
+}
+#else
srlz.d
(p6) movl r19=__pa(swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+#endif
+ .pred.rel "mutex", p6, p7
(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
;;
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/mca.c aegl/arch/ia64/kernel/mca.c
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/mca.c Wed Oct 9 11:39:29 2002
+++ aegl/arch/ia64/kernel/mca.c Wed Oct 9 10:26:19 2002
@@ -434,17 +434,17 @@
IA64_MCA_DEBUG("ia64_mca_init: registered mca rendezvous spinloop and wakeup mech.\n");
- ia64_mc_info.imi_mca_handler = __pa(mca_hldlr_ptr->fp);
+ ia64_mc_info.imi_mca_handler = __tpa(mca_hldlr_ptr->fp);
/*
* XXX - disable SAL checksum by setting size to 0; should be
- * __pa(ia64_os_mca_dispatch_end) - __pa(ia64_os_mca_dispatch);
+ * __tpa(ia64_os_mca_dispatch_end) - __tpa(ia64_os_mca_dispatch);
*/
ia64_mc_info.imi_mca_handler_size = 0;
/* Register the os mca handler with SAL */
if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA,
ia64_mc_info.imi_mca_handler,
- mca_hldlr_ptr->gp,
+ __tpa(mca_hldlr_ptr->gp),
ia64_mc_info.imi_mca_handler_size,
0, 0, 0)))
{
@@ -454,15 +454,15 @@
}
IA64_MCA_DEBUG("ia64_mca_init: registered os mca handler with SAL at 0x%lx, gp = 0x%lx\n",
- ia64_mc_info.imi_mca_handler, mca_hldlr_ptr->gp);
+ ia64_mc_info.imi_mca_handler, __tpa(mca_hldlr_ptr->gp));
/*
* XXX - disable SAL checksum by setting size to 0, should be
* IA64_INIT_HANDLER_SIZE
*/
- ia64_mc_info.imi_monarch_init_handler = __pa(mon_init_ptr->fp);
+ ia64_mc_info.imi_monarch_init_handler = __tpa(mon_init_ptr->fp);
ia64_mc_info.imi_monarch_init_handler_size = 0;
- ia64_mc_info.imi_slave_init_handler = __pa(slave_init_ptr->fp);
+ ia64_mc_info.imi_slave_init_handler = __tpa(slave_init_ptr->fp);
ia64_mc_info.imi_slave_init_handler_size = 0;
IA64_MCA_DEBUG("ia64_mca_init: os init handler at %lx\n",
@@ -471,10 +471,10 @@
/* Register the os init handler with SAL */
if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT,
ia64_mc_info.imi_monarch_init_handler,
- __pa(ia64_get_gp()),
+ __tpa(ia64_get_gp()),
ia64_mc_info.imi_monarch_init_handler_size,
ia64_mc_info.imi_slave_init_handler,
- __pa(ia64_get_gp()),
+ __tpa(ia64_get_gp()),
ia64_mc_info.imi_slave_init_handler_size)))
{
printk("ia64_mca_init: Failed to register m/s init handlers with SAL. rc = %ld\n",
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/pal.S aegl/arch/ia64/kernel/pal.S
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/pal.S Fri Sep 27 14:50:57 2002
+++ aegl/arch/ia64/kernel/pal.S Wed Oct 9 10:38:14 2002
@@ -164,7 +164,11 @@
;;
mov loc4=ar.rsc // save RSE configuration
dep.z loc2=loc2,0,61 // convert pal entry point to physical
+#ifdef CONFIG_IA64_VMAP_KERNEL
+ tpa r8=r8 // convert rp to physical
+#else
dep.z r8=r8,0,61 // convert rp to physical
+#endif
;;
mov b7 = loc2 // install target to branch reg
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
@@ -174,13 +178,13 @@
or loc3=loc3,r17 // add in psr the bits to set
;;
andcm r16=loc3,r16 // removes bits to clear from psr
- br.call.sptk.many rp=ia64_switch_mode
+ br.call.sptk.many rp=ia64_switch_mode_phys
.ret1: mov rp = r8 // install return address (physical)
br.cond.sptk.many b7
1:
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3 // r16= original psr
- br.call.sptk.many rp=ia64_switch_mode // return to virtual mode
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
.ret2:
mov psr.l = loc3 // restore init PSR
@@ -228,13 +232,13 @@
mov b7 = loc2 // install target to branch reg
;;
andcm r16=loc3,r16 // removes bits to clear from psr
- br.call.sptk.many rp=ia64_switch_mode
+ br.call.sptk.many rp=ia64_switch_mode_phys
.ret6:
br.call.sptk.many rp=b7 // now make the call
.ret7:
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3 // r16= original psr
- br.call.sptk.many rp=ia64_switch_mode // return to virtual mode
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
.ret8: mov psr.l = loc3 // restore init PSR
mov ar.pfs = loc1
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/setup.c aegl/arch/ia64/kernel/setup.c
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/setup.c Fri Sep 27 14:49:06 2002
+++ aegl/arch/ia64/kernel/setup.c Tue Oct 15 13:58:00 2002
@@ -231,8 +231,8 @@
+ strlen(__va(ia64_boot_param->command_line)) + 1);
n++;
- rsvd_region[n].start = KERNEL_START;
- rsvd_region[n].end = KERNEL_END;
+ rsvd_region[n].start = __imva(KERNEL_START);
+ rsvd_region[n].end = __imva(KERNEL_END);
n++;
#ifdef CONFIG_BLK_DEV_INITRD
@@ -282,6 +282,51 @@
#endif
}
+#ifdef CONFIG_IA64_VMAP_KERNEL
+/*
+ * There are two places in the performance critical path of
+ * the exception handling code where we need to know the physical
+ * address of the swapper_pg_dir structure. This routine
+ * patches the "movl" instructions to load the value needed.
+ */
+static void __init
+patch_ivt_with_phys_swapper_pg_dir(void)
+{
+ extern char ia64_ivt_patch1[], ia64_ivt_patch2[];
+ unsigned long spd = __tpa(swapper_pg_dir);
+ unsigned long *p;
+
+ p = (unsigned long *)__imva(ia64_ivt_patch1);
+
+ *p = (*p & 0x3fffffffffffUL) |
+ ((spd & 0x000000ffffc00000UL)<<24);
+ p++;
+ *p = (*p & 0xf000080fff800000UL) |
+ ((spd & 0x8000000000000000UL) >> 4) |
+ ((spd & 0x7fffff0000000000UL) >> 40) |
+ ((spd & 0x00000000001f0000UL) << 29) |
+ ((spd & 0x0000000000200000UL) << 23) |
+ ((spd & 0x000000000000ff80UL) << 43) |
+ ((spd & 0x000000000000007fUL) << 36);
+
+ p = (unsigned long *)__imva(ia64_ivt_patch2);
+
+ *p = (*p & 0x3fffffffffffUL) |
+ ((spd & 0x000000ffffc00000UL)<<24);
+ p++;
+ *p = (*p & 0xf000080fff800000UL) |
+ ((spd & 0x8000000000000000UL) >> 4) |
+ ((spd & 0x7fffff0000000000UL) >> 40) |
+ ((spd & 0x00000000001f0000UL) << 29) |
+ ((spd & 0x0000000000200000UL) << 23) |
+ ((spd & 0x000000000000ff80UL) << 43) |
+ ((spd & 0x000000000000007fUL) << 36);
+}
+#define PATCH_IVT() patch_ivt_with_phys_swapper_pg_dir()
+#else
+#define PATCH_IVT()
+#endif
+
void __init
setup_arch (char **cmdline_p)
{
@@ -290,6 +335,8 @@
unw_init();
+ PATCH_IVT();
+
*cmdline_p = __va(ia64_boot_param->command_line);
strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/smpboot.c aegl/arch/ia64/kernel/smpboot.c
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/kernel/smpboot.c Fri Sep 27 14:49:16 2002
+++ aegl/arch/ia64/kernel/smpboot.c Wed Oct 9 10:58:39 2002
@@ -522,7 +522,7 @@
/* Tell SAL where to drop the AP's. */
ap_startup = (struct fptr *) start_ap;
sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
- __pa(ap_startup->fp), __pa(ap_startup->gp), 0, 0, 0, 0);
+ __tpa(ap_startup->fp), __tpa(ap_startup->gp), 0, 0, 0, 0);
if (sal_ret < 0)
printk("SMP: Can't set SAL AP Boot Rendezvous: %s\n", ia64_sal_strerror(sal_ret));
}
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/mm/init.c aegl/arch/ia64/mm/init.c
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/mm/init.c Wed Oct 9 11:39:29 2002
+++ aegl/arch/ia64/mm/init.c Tue Oct 15 13:59:05 2002
@@ -39,6 +39,10 @@
static int pgt_cache_water[2] = { 25, 50 };
+#ifdef CONFIG_IA64_VMAP_KERNEL
+struct page *zero_page_memmap_ptr; /* map entry for zero page */
+#endif
+
void
check_pgt_cache (void)
{
@@ -104,14 +108,16 @@
void
free_initmem (void)
{
- unsigned long addr;
+ unsigned long addr, eaddr;
- addr = (unsigned long) &__init_begin;
- for (; addr < (unsigned long) &__init_end; addr += PAGE_SIZE) {
+ addr = (unsigned long)__imva(&__init_begin);
+ eaddr = (unsigned long)__imva(&__init_end);
+ while (addr < eaddr) {
ClearPageReserved(virt_to_page(addr));
set_page_count(virt_to_page(addr), 1);
free_page(addr);
++totalram_pages;
+ addr += PAGE_SIZE;
}
printk(KERN_INFO "Freeing unused kernel memory: %ldkB freed\n",
(&__init_end - &__init_begin) >> 10);
@@ -286,7 +292,7 @@
ia64_srlz_d();
ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
- pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)), PAGE_SHIFT);
+ pte_val(pfn_pte(__tpa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)), PAGE_SHIFT);
ia64_set_psr(psr);
ia64_srlz_i();
@@ -364,6 +370,9 @@
zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
}
free_area_init(zones_size);
+#ifdef CONFIG_IA64_VMAP_KERNEL
+ zero_page_memmap_ptr = virt_to_page(__imva(empty_zero_page));
+#endif
}
static int
@@ -442,7 +451,7 @@
pgt_cache_water[1] = num_pgt_pages;
/* install the gate page in the global page table: */
- put_gate_page(virt_to_page(__start_gate_section), GATE_ADDR);
+ put_gate_page(virt_to_page(__imva(__start_gate_section)), GATE_ADDR);
#ifdef CONFIG_IA32_SUPPORT
ia32_gdt_init();
diff -ru ../../REF/linux-2.5.39-ia64-020928/arch/ia64/vmlinux.lds.S aegl/arch/ia64/vmlinux.lds.S
--- ../../REF/linux-2.5.39-ia64-020928/arch/ia64/vmlinux.lds.S Wed Oct 9 11:39:29 2002
+++ aegl/arch/ia64/vmlinux.lds.S Mon Oct 7 17:12:16 2002
@@ -3,6 +3,12 @@
#include <asm/cache.h>
#include <asm/ptrace.h>
#include <asm/system.h>
+#ifdef CONFIG_IA64_VMAP_KERNEL
+#include <asm/pgtable.h>
+#define BASE_KVADDR KERNEL_START + KERNEL_TR_PAGE_SIZE
+#else
+#define BASE_KVADDR PAGE_OFFSET
+#endif
OUTPUT_FORMAT("elf64-ia64-little")
OUTPUT_ARCH(ia64)
@@ -20,21 +26,21 @@
}
v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
- phys_start = _start - PAGE_OFFSET;
+ phys_start = _start - BASE_KVADDR;
. = KERNEL_START;
_text = .;
_stext = .;
- .text : AT(ADDR(.text) - PAGE_OFFSET)
+ .text : AT(ADDR(.text) - BASE_KVADDR)
{
*(.text.ivt)
*(.text)
}
- .text2 : AT(ADDR(.text2) - PAGE_OFFSET)
+ .text2 : AT(ADDR(.text2) - BASE_KVADDR)
{ *(.text2) }
#ifdef CONFIG_SMP
- .text.lock : AT(ADDR(.text.lock) - PAGE_OFFSET)
+ .text.lock : AT(ADDR(.text.lock) - BASE_KVADDR)
{ *(.text.lock) }
#endif
_etext = .;
@@ -47,7 +53,7 @@
/* Exception table */
. = ALIGN(16);
__start___ex_table = .;
- __ex_table : AT(ADDR(__ex_table) - PAGE_OFFSET)
+ __ex_table : AT(ADDR(__ex_table) - BASE_KVADDR)
{ *(__ex_table) }
__stop___ex_table = .;
@@ -55,48 +61,48 @@
/* Machine Vector */
. = ALIGN(16);
machvec_start = .;
- .machvec : AT(ADDR(.machvec) - PAGE_OFFSET)
+ .machvec : AT(ADDR(.machvec) - BASE_KVADDR)
{ *(.machvec) }
machvec_end = .;
#endif
__start___ksymtab = .; /* Kernel symbol table */
- __ksymtab : AT(ADDR(__ksymtab) - PAGE_OFFSET)
+ __ksymtab : AT(ADDR(__ksymtab) - BASE_KVADDR)
{ *(__ksymtab) }
__stop___ksymtab = .;
/* Unwind info & table: */
. = ALIGN(8);
- .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - PAGE_OFFSET)
+ .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - BASE_KVADDR)
{ *(.IA_64.unwind_info*) }
ia64_unw_start = .;
- .IA_64.unwind : AT(ADDR(.IA_64.unwind) - PAGE_OFFSET)
+ .IA_64.unwind : AT(ADDR(.IA_64.unwind) - BASE_KVADDR)
{ *(.IA_64.unwind*) }
ia64_unw_end = .;
- .rodata : AT(ADDR(.rodata) - PAGE_OFFSET)
+ .rodata : AT(ADDR(.rodata) - BASE_KVADDR)
{ *(.rodata) *(.rodata.*) }
- .kstrtab : AT(ADDR(.kstrtab) - PAGE_OFFSET)
+ .kstrtab : AT(ADDR(.kstrtab) - BASE_KVADDR)
{ *(.kstrtab) }
- .opd : AT(ADDR(.opd) - PAGE_OFFSET)
+ .opd : AT(ADDR(.opd) - BASE_KVADDR)
{ *(.opd) }
/* Initialization code and data: */
. = ALIGN(PAGE_SIZE);
__init_begin = .;
- .text.init : AT(ADDR(.text.init) - PAGE_OFFSET)
+ .text.init : AT(ADDR(.text.init) - BASE_KVADDR)
{ *(.text.init) }
- .data.init : AT(ADDR(.data.init) - PAGE_OFFSET)
+ .data.init : AT(ADDR(.data.init) - BASE_KVADDR)
{ *(.data.init) }
. = ALIGN(16);
__setup_start = .;
- .setup.init : AT(ADDR(.setup.init) - PAGE_OFFSET)
+ .setup.init : AT(ADDR(.setup.init) - BASE_KVADDR)
{ *(.setup.init) }
__setup_end = .;
__initcall_start = .;
- .initcall.init : AT(ADDR(.initcall.init) - PAGE_OFFSET)
+ .initcall.init : AT(ADDR(.initcall.init) - BASE_KVADDR)
{
*(.initcall1.init)
*(.initcall2.init)
@@ -111,10 +117,10 @@
__init_end = .;
/* The initial task and kernel stack */
- .data.init_task : AT(ADDR(.data.init_task) - PAGE_OFFSET)
+ .data.init_task : AT(ADDR(.data.init_task) - BASE_KVADDR)
{ *(.data.init_task) }
- .data.page_aligned : AT(ADDR(.data.page_aligned) - PAGE_OFFSET)
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - BASE_KVADDR)
{ *(__special_page_section)
__start_gate_section = .;
*(.text.gate)
@@ -122,17 +128,17 @@
}
. = ALIGN(SMP_CACHE_BYTES);
- .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - PAGE_OFFSET)
+ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - BASE_KVADDR)
{ *(.data.cacheline_aligned) }
/* Kernel symbol names for modules: */
- .kstrtab : AT(ADDR(.kstrtab) - PAGE_OFFSET)
+ .kstrtab : AT(ADDR(.kstrtab) - BASE_KVADDR)
{ *(.kstrtab) }
/* Per-cpu data: */
. = ALIGN(PAGE_SIZE);
__phys_per_cpu_start = .;
- .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - PAGE_OFFSET)
+ .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - BASE_KVADDR)
{
__per_cpu_start = .;
*(.data.percpu)
@@ -140,28 +146,28 @@
}
. = __phys_per_cpu_start + 4096; /* ensure percpu fits into smallest page size (4KB) */
- .data : AT(ADDR(.data) - PAGE_OFFSET)
+ .data : AT(ADDR(.data) - BASE_KVADDR)
{ *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS }
. = ALIGN(16);
__gp = . + 0x200000; /* gp must be 16-byte aligned for exc. table */
- .got : AT(ADDR(.got) - PAGE_OFFSET)
+ .got : AT(ADDR(.got) - BASE_KVADDR)
{ *(.got.plt) *(.got) }
/* We want the small data sections together, so single-instruction offsets
can access them all, and initialized data all before uninitialized, so
we can shorten the on-disk segment size. */
- .sdata : AT(ADDR(.sdata) - PAGE_OFFSET)
+ .sdata : AT(ADDR(.sdata) - BASE_KVADDR)
{ *(.sdata) }
_edata = .;
_bss = .;
- .sbss : AT(ADDR(.sbss) - PAGE_OFFSET)
+ .sbss : AT(ADDR(.sbss) - BASE_KVADDR)
{ *(.sbss) *(.scommon) }
- .bss : AT(ADDR(.bss) - PAGE_OFFSET)
+ .bss : AT(ADDR(.bss) - BASE_KVADDR)
{ *(.bss) *(COMMON) }
/* XXX Must this come last to avoid shifting other symbols? --davidm */
- __kallsyms : AT(ADDR(__kallsyms) - PAGE_OFFSET)
+ __kallsyms : AT(ADDR(__kallsyms) - BASE_KVADDR)
{
__start___kallsyms = .; /* All kernel symbols */
*(__kallsyms)
diff -ru ../../REF/linux-2.5.39-ia64-020928/fs/proc/kcore.c aegl/fs/proc/kcore.c
--- ../../REF/linux-2.5.39-ia64-020928/fs/proc/kcore.c Fri Sep 27 14:48:35 2002
+++ aegl/fs/proc/kcore.c Tue Oct 15 13:03:44 2002
@@ -99,6 +99,12 @@
}
#else /* CONFIG_KCORE_AOUT */
+#if VMALLOC_START < PAGE_OFFSET
+#define KCORE_BASE VMALLOC_START
+#else
+#define KCORE_BASE PAGE_OFFSET
+#endif
+
#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
/* An ELF note in memory */
@@ -111,6 +117,12 @@
};
extern char saved_command_line[];
+#ifdef CONFIG_IA64_VMAP_KERNEL
+extern char _stext[], _end[];
+#define NPHDR 3
+#else
+#define NPHDR 2
+#endif
static size_t get_kcore_size(int *num_vma, size_t *elf_buflen)
{
@@ -118,7 +130,11 @@
struct vm_struct *m;
*num_vma = 0;
- size = ((size_t)high_memory - PAGE_OFFSET + PAGE_SIZE);
+ size = ((size_t)high_memory - KCORE_BASE + PAGE_SIZE);
+#ifdef CONFIG_IA64_VMAP_KERNEL
+ if ((size_t)_end > KCORE_BASE + size)
+ size = (size_t)_end - KCORE_BASE;
+#endif
if (!vmlist) {
*elf_buflen = PAGE_SIZE;
return (size);
@@ -126,15 +142,15 @@
for (m=vmlist; m; m=m->next) {
try = (size_t)m->addr + m->size;
- if (try > size)
- size = try;
+ if (try > KCORE_BASE + size)
+ size = try - KCORE_BASE;
*num_vma = *num_vma + 1;
}
*elf_buflen = sizeof(struct elfhdr) +
- (*num_vma + 2)*sizeof(struct elf_phdr) +
+ (*num_vma + NPHDR)*sizeof(struct elf_phdr) +
3 * sizeof(struct memelfnote);
*elf_buflen = PAGE_ALIGN(*elf_buflen);
- return (size - PAGE_OFFSET + *elf_buflen);
+ return size + *elf_buflen;
}
@@ -237,12 +253,26 @@
offset += sizeof(struct elf_phdr);
phdr->p_type = PT_LOAD;
phdr->p_flags = PF_R|PF_W|PF_X;
- phdr->p_offset = dataoff;
+ phdr->p_offset = PAGE_OFFSET - KCORE_BASE + dataoff;
phdr->p_vaddr = PAGE_OFFSET;
phdr->p_paddr = __pa(PAGE_OFFSET);
phdr->p_filesz = phdr->p_memsz = ((unsigned long)high_memory - PAGE_OFFSET);
phdr->p_align = PAGE_SIZE;
+#ifdef CONFIG_IA64_VMAP_KERNEL
+ /* setup ELF PT_LOAD program header for kernel */
+ phdr = (struct elf_phdr *) bufp;
+ bufp += sizeof(struct elf_phdr);
+ offset += sizeof(struct elf_phdr);
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_offset = (unsigned long)_stext - KCORE_BASE + dataoff;
+ phdr->p_vaddr = (unsigned long)_stext;
+ phdr->p_paddr = __tpa(_stext);
+ phdr->p_filesz = phdr->p_memsz = _end - _stext;
+ phdr->p_align = PAGE_SIZE;
+#endif
+
/* setup ELF PT_LOAD program header for every vmalloc'd area */
for (m=vmlist; m; m=m->next) {
if (m->flags & VM_IOREMAP) /* don't dump ioremap'd stuff! (TA) */
@@ -254,7 +284,7 @@
phdr->p_type = PT_LOAD;
phdr->p_flags = PF_R|PF_W|PF_X;
- phdr->p_offset = (size_t)m->addr - PAGE_OFFSET + dataoff;
+ phdr->p_offset = (size_t)m->addr - KCORE_BASE + dataoff;
phdr->p_vaddr = (size_t)m->addr;
phdr->p_paddr = __pa(m->addr);
phdr->p_filesz = phdr->p_memsz = m->size;
@@ -385,9 +415,9 @@
/*
* Fill the remainder of the buffer from kernel VM space.
* We said in the ELF header that the data which starts
- * at 'elf_buflen' is virtual address PAGE_OFFSET. --rmk
+ * at 'elf_buflen' is virtual address KCORE_BASE. --rmk
*/
- start = PAGE_OFFSET + (*fpos - elf_buflen);
+ start = KCORE_BASE + (*fpos - elf_buflen);
if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
tsz = buflen;
@@ -446,6 +476,17 @@
if (clear_user(buffer, tsz))
return -EFAULT;
}
+#ifdef CONFIG_IA64_VMAP_KERNEL
+ } else if ((start > (unsigned long)_stext) && (start <
+ (unsigned long)_end)) {
+ if (kern_addr_valid(start)) {
+ if (copy_to_user(buffer, (char *)start, tsz))
+ return -EFAULT;
+ } else {
+ if (clear_user(buffer, tsz))
+ return -EFAULT;
+ }
+#endif
} else {
if (clear_user(buffer, tsz))
return -EFAULT;
diff -ru ../../REF/linux-2.5.39-ia64-020928/include/asm-ia64/page.h aegl/include/asm-ia64/page.h
--- ../../REF/linux-2.5.39-ia64-020928/include/asm-ia64/page.h Fri Sep 27 14:49:06 2002
+++ aegl/include/asm-ia64/page.h Tue Oct 8 17:49:48 2002
@@ -106,6 +106,13 @@
*/
#define __pa(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = 0; _v.l;})
#define __va(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = -1; _v.p;})
+#ifdef CONFIG_IA64_VMAP_KERNEL
+#define __tpa(x) ({ia64_va _v; asm("tpa %0=%1" : "=r"(_v.l) : "r"(x)); _v.l;})
+#define __imva(x) ((long)__va(__tpa(x)))
+#else
+#define __tpa(x) __pa(x)
+#define __imva(x) (x)
+#endif
#define REGION_NUMBER(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg;})
#define REGION_OFFSET(x) ({ia64_va _v; _v.l = (long) (x); _v.f.off;})
diff -ru ../../REF/linux-2.5.39-ia64-020928/include/asm-ia64/pgtable.h aegl/include/asm-ia64/pgtable.h
--- ../../REF/linux-2.5.39-ia64-020928/include/asm-ia64/pgtable.h Fri Sep 27 14:49:40 2002
+++ aegl/include/asm-ia64/pgtable.h Wed Oct 9 13:53:42 2002
@@ -415,7 +415,12 @@
* for zero-mapped memory areas etc..
*/
extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+#ifdef CONFIG_IA64_VMAP_KERNEL
+extern struct page *zero_page_memmap_ptr;
+#define ZERO_PAGE(vaddr) (zero_page_memmap_ptr)
+#else
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+#endif
/* We provide our own get_unmapped_area to cope with VA holes for userland */
#define HAVE_ARCH_UNMAPPED_AREA
@@ -440,7 +445,9 @@
*/
#define KERNEL_TR_PAGE_SHIFT _PAGE_SIZE_64M
#define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT)
+#ifndef CONFIG_IA64_VMAP_KERNEL
#define KERNEL_TR_PAGE_NUM ((KERNEL_START - PAGE_OFFSET) / KERNEL_TR_PAGE_SIZE)
+#endif
/*
* No page table caches to initialise
diff -ru ../../REF/linux-2.5.39-ia64-020928/include/asm-ia64/system.h aegl/include/asm-ia64/system.h
--- ../../REF/linux-2.5.39-ia64-020928/include/asm-ia64/system.h Fri Sep 27 14:49:49 2002
+++ aegl/include/asm-ia64/system.h Mon Oct 7 10:59:03 2002
@@ -18,7 +18,11 @@
#include <asm/page.h>
#include <asm/pal.h>
+#ifdef CONFIG_IA64_VMAP_KERNEL
+#define KERNEL_START (0xffffffff00000000)
+#else
#define KERNEL_START (PAGE_OFFSET + 68*1024*1024)
+#endif
#define GATE_ADDR (0xa000000000000000 + PAGE_SIZE)
#define PERCPU_ADDR (0xa000000000000000 + 2*PAGE_SIZE)
^ permalink raw reply [flat|nested] 60+ messages in thread* [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
2003-04-17 23:57 ` Jesse Barnes
2003-04-25 21:02 ` Jesse Barnes
@ 2003-05-07 22:39 ` David Mosberger
2003-05-07 23:24 ` Luck, Tony
` (55 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-07 22:39 UTC (permalink / raw)
To: linux-ia64
Hi Jesse,
I thought I had responded to this already, but my mailer claims otherwise.
>>>>> On Fri, 25 Apr 2003 14:02:20 -0700, Jesse Barnes <jbarnes@sgi.com> said:
Jesse> On Thu, Apr 17, 2003 at 04:05:29PM -0700, David Mosberger wrote:
>> I don't have an issue with the particular patch, but I don't like to
>> take new features incrementally. At the moment, the normal ia64
>> kernel doesn't support loading at arbitrary physical addresses. If
>> there is a complete patch to add such a feature, we should consider it
>> for inclusion.
Jesse> Chris Wedgwood reminded me that Tony posted one awhile back. I wonder
Jesse> what you think of his approach? I've attached it for reference since
Jesse> the linux-ia64 archive only had a mangled copy. It's against 2.5.39.
Jesse> If you have any other ideas about how we can address this, I'd like to
Jesse> hear them. Ideally, a generic kernel would work on all ia64 platforms
Jesse> at the same virtual load address (which is one problem with Tony's old
Jesse> patch IMO), regardless of physical load address.
Keeping the load address the same would definitely make life easier.
Otherwise, any tool that depends on System.map may break.
To be honest, conceptually, I prefer boot-time relocation, because it
keeps the kernel model simpler (and there is already relocation code
in the in-kernel module loader which you could leverage). But having
the kernel text addresses vary from one machine to another, depending
on memory configuration is not a exactly a pleasant thought. I don't
really have much of a stake in this, so I'd like the folks who really
care to work out a patch that works on all ia64 NUMA platforms and
then convince me why it's a good idea to include that patch and why
it's really The Right Thing.
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (2 preceding siblings ...)
2003-05-07 22:39 ` David Mosberger
@ 2003-05-07 23:24 ` Luck, Tony
2003-05-07 23:51 ` David Mosberger
` (54 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-07 23:24 UTC (permalink / raw)
To: linux-ia64
> Keeping the load address the same would definitely make life easier.
> Otherwise, any tool that depends on System.map may break.
>
> To be honest, conceptually, I prefer boot-time relocation, because it
> keeps the kernel model simpler (and there is already relocation code
> in the in-kernel module loader which you could leverage). But having
> the kernel text addresses vary from one machine to another, depending
> on memory configuration is not a exactly a pleasant thought. I don't
> really have much of a stake in this, so I'd like the folks who really
> care to work out a patch that works on all ia64 NUMA platforms and
> then convince me why it's a good idea to include that patch and why
> it's really The Right Thing.
In case anyone missed the point of this, the issue is handling machines
that don't have physical memory at the location that Linux expects to
be loaded at (some ccNUMA machines that configure memory based on node
numbers cannot guarantee until boot time where any of the memory is physically
located ... depending on which nodes exist).
Here are the approaches that have been proposed and/or tried so far:
1) My patch (posted around October last year) which picked virtual addresses
in the wild blue yonder (initial versions used 0xe002000000000000, later ones
used 0xffffffff00000000) for the link address for the kernel. Elilo can load
kernel at any suitably aligned physical address, and head.S establishes the
mappings using itr[0] and dtr[0].
pros) provided separate maps for kernel text and data, so supported kernel text
replication too.
cons) __pa() no longer works on kernel addresses, use new __tpa() instead.
Some ugly runtime patching of kernel code to get physical address of
swapper_pg_dir into the TLB miss code.
2) I think SGI are currently running a modified version of #1 without the text
replication support, and that provides a mapping from the normal virtual
address that the kernel is linked for (0xe00000000044000000) to whatever physical
address it was actually loaded at ... at least I think that's what Jack said.
pros) simpler than my patch
cons) Still needs __tpa() instead of __pa() for kernel addresses.
3) David's suggestion of boot-time relocation. Probably simplest to implement
this in elilo, but if you are really good at PIC asm code it could be done in
the kernel startup sequence.
pros) Just like linking kernel at a new address.
Avoids the __tpa() issue.
Doesn't invalidate any assumptions about how to get from virtual to
physical addresses and back again.
cons) Nobody has implemented it.
-Tony
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (3 preceding siblings ...)
2003-05-07 23:24 ` Luck, Tony
@ 2003-05-07 23:51 ` David Mosberger
2003-05-08 0:00 ` Jesse Barnes
` (53 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-07 23:51 UTC (permalink / raw)
To: linux-ia64
Hi Tony,
Great summary!
Tony> 3) David's suggestion of boot-time relocation. Probably
Tony> simplest to implement this in elilo, but if you are really
Tony> good at PIC asm code it could be done in the kernel startup
Tony> sequence.
Agreed. Anyone who's ever debugged the ld.so startup code will agree
that it's much better to do the relocation in elilo.
Tony> pros) Just like linking kernel at a new address. Avoids the
Tony> __tpa() issue. Doesn't invalidate any assumptions about how
Tony> to get from virtual to physical addresses and back again.
Tony> cons) Nobody has implemented it.
I think:
- kernel text addresses vary depending on physical memory conf.
is potentially a bigger issue. Implementing the relocator in elilo
should be really easy (just take the kernel loader and massage it a
bit).
How many tools are out there that depend on System.map or similar? I
suppose you could just generate a fresh System.map at boot time (or
have something like /proc/System.map).
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (4 preceding siblings ...)
2003-05-07 23:51 ` David Mosberger
@ 2003-05-08 0:00 ` Jesse Barnes
2003-05-08 0:04 ` Jesse Barnes
` (52 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-05-08 0:00 UTC (permalink / raw)
To: linux-ia64
On Wed, May 07, 2003 at 04:51:21PM -0700, David Mosberger wrote:
> Hi Tony,
>
> Great summary!
Yeah, thanks. It's a good outline of our options.
> Tony> 3) David's suggestion of boot-time relocation. Probably
> Tony> simplest to implement this in elilo, but if you are really
> Tony> good at PIC asm code it could be done in the kernel startup
> Tony> sequence.
>
> Agreed. Anyone who's ever debugged the ld.so startup code will agree
> that it's much better to do the relocation in elilo.
>
> Tony> pros) Just like linking kernel at a new address. Avoids the
> Tony> __tpa() issue. Doesn't invalidate any assumptions about how
> Tony> to get from virtual to physical addresses and back again.
>
> Tony> cons) Nobody has implemented it.
>
> I think:
>
> - kernel text addresses vary depending on physical memory conf.
>
> is potentially a bigger issue. Implementing the relocator in elilo
> should be really easy (just take the kernel loader and massage it a
> bit).
>
> How many tools are out there that depend on System.map or similar? I
> suppose you could just generate a fresh System.map at boot time (or
> have something like /proc/System.map).
Ahh, this might be a good way to go. We'd basically be relinking the
kernel at load time with a new KERNEL_START, right? I haven't looked
at the new module loader code yet (in fact, I've never done any linker
stuff), but I will now.
Thanks,
Jesse
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (5 preceding siblings ...)
2003-05-08 0:00 ` Jesse Barnes
@ 2003-05-08 0:04 ` Jesse Barnes
2003-05-08 0:07 ` Luck, Tony
` (51 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-05-08 0:04 UTC (permalink / raw)
To: linux-ia64
On Wed, May 07, 2003 at 04:51:21PM -0700, David Mosberger wrote:
> How many tools are out there that depend on System.map or similar? I
> suppose you could just generate a fresh System.map at boot time (or
> have something like /proc/System.map).
Forgot to mention that there are tools that depend on this (e.g.
VTune, but it only cares about the load addr and size), but
/proc/ksyms may be enough already if the right symbols are exported.
Is it still there in 2.5?
Thanks,
Jesse
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (6 preceding siblings ...)
2003-05-08 0:04 ` Jesse Barnes
@ 2003-05-08 0:07 ` Luck, Tony
2003-05-08 0:13 ` Keith Owens
` (50 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-08 0:07 UTC (permalink / raw)
To: linux-ia64
> How many tools are out there that depend on System.map or similar? I
> suppose you could just generate a fresh System.map at boot time (or
> have something like /proc/System.map).
Apparently a few, but not a vast number ... on RedHat AS2.1:
# grep -rl 'System.map' /bin /usr/bin /usr/sbin /etc /sbin
/bin/ps
/usr/bin/ksymoops
/usr/bin/kpm
/usr/sbin/readprofile
/etc/rc.d/rc.sysinit
/etc/rc.sysinit
/sbin/depmod
/sbin/insmod.static
/sbin/insmod
/sbin/modprobe
/sbin/ksyms
/sbin/kallsyms
/sbin/lsmod
/sbin/modinfo
/sbin/rmmod
/sbin/klogd
/sbin/installkernel
/sbin/new-kernel-pkg
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (7 preceding siblings ...)
2003-05-08 0:07 ` Luck, Tony
@ 2003-05-08 0:13 ` Keith Owens
2003-05-08 0:21 ` David Mosberger
` (49 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Keith Owens @ 2003-05-08 0:13 UTC (permalink / raw)
To: linux-ia64
On Wed, 7 May 2003 16:51:21 -0700,
David Mosberger <davidm@napali.hpl.hp.com> wrote:
>How many tools are out there that depend on System.map or similar? I
>suppose you could just generate a fresh System.map at boot time (or
>have something like /proc/System.map).
ksymoops (user space), kdb, kksymoops (kernel).
Both kdb and kksymoops are specifically designed to not use
relocations, they store kernel addresses as absolute numbers. Not that
big a deal, ia64 setup can run the kallsyms tables at boot time and
relocate them.
ksymoops in user space is a bigger problem, it reads System.map.
Fixing user space will be a problem. OTOH, the function descriptor
problem in ia64 means that ksymoops in user space gets thousands of
address mismatches between System.map and /proc/ksyms so ksymoops is
not used very much for ia64.
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (8 preceding siblings ...)
2003-05-08 0:13 ` Keith Owens
@ 2003-05-08 0:21 ` David Mosberger
2003-05-08 0:23 ` David Mosberger
` (48 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-08 0:21 UTC (permalink / raw)
To: linux-ia64
>>>>> On Wed, 7 May 2003 17:00:45 -0700, Jesse Barnes <jbarnes@sgi.com> said:
Jesse> Ahh, this might be a good way to go. We'd basically be
Jesse> relinking the kernel at load time with a new KERNEL_START,
Jesse> right?
Yup.
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (9 preceding siblings ...)
2003-05-08 0:21 ` David Mosberger
@ 2003-05-08 0:23 ` David Mosberger
2003-05-08 0:24 ` Keith Owens
` (47 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-08 0:23 UTC (permalink / raw)
To: linux-ia64
>>>>> On Wed, 7 May 2003 17:21:18 -0700, David Mosberger <davidm@linux.hpl.hp.com> said:
>>>>> On Wed, 7 May 2003 17:00:45 -0700, Jesse Barnes <jbarnes@sgi.com> said:
Jesse> Ahh, this might be a good way to go. We'd basically be
Jesse> relinking the kernel at load time with a new KERNEL_START,
Jesse> right?
David> Yup.
On second thought, "relinking" might be confusing. "Relocating" is
more accurate (we don't rearrange anything within the kernel, just
moving the whole thing around, which is a lot easier).
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (10 preceding siblings ...)
2003-05-08 0:23 ` David Mosberger
@ 2003-05-08 0:24 ` Keith Owens
2003-05-08 0:54 ` David Mosberger
` (46 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Keith Owens @ 2003-05-08 0:24 UTC (permalink / raw)
To: linux-ia64
On Wed, 7 May 2003 17:07:42 -0700,
"Luck, Tony" <tony.luck@intel.com> wrote:
>David wrote
>> How many tools are out there that depend on System.map or similar? I
>> suppose you could just generate a fresh System.map at boot time (or
>> have something like /proc/System.map).
>
>Apparently a few, but not a vast number ... on RedHat AS2.1:
>
> # grep -rl 'System.map' /bin /usr/bin /usr/sbin /etc /sbin
> /bin/ps
> /usr/bin/ksymoops
> /usr/bin/kpm
> /usr/sbin/readprofile
> /etc/rc.d/rc.sysinit
> /etc/rc.sysinit
> /sbin/depmod
> /sbin/insmod.static
> /sbin/insmod
> /sbin/modprobe
> /sbin/ksyms
> /sbin/kallsyms
> /sbin/lsmod
> /sbin/modinfo
> /sbin/rmmod
> /sbin/klogd
> /sbin/installkernel
> /sbin/new-kernel-pkg
Out of the modutils programs, only depmod actually reads System.map and
only when doing make modules_install. It does not care about the
addresses, only if a symbol is exported or not. insmod, modprobe,
ksyms, lsmod, modinfo, rmmod contain a reference to System.map because
of a table that lists files that might appear in /lib/modules/`uname -r`,
they only care about the file name, not its contents.
kallsyms will require run time relocation, its table uses absolute
kernel addresses.
rc.sysinit only looks for the file System.map so it can set up a
symlink, it does not read the contents.
installkernel and new-kernel-pkg just copy System.map so other tools
can find it.
klogd, ps, readprofile and lkcd (not on that list) will need a clean
copy of System.map. IOW it has to be regenerated for those programs.
Don't know about kpm.
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (11 preceding siblings ...)
2003-05-08 0:24 ` Keith Owens
@ 2003-05-08 0:54 ` David Mosberger
2003-05-08 1:07 ` David Mosberger
` (45 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-08 0:54 UTC (permalink / raw)
To: linux-ia64
>>>>> On Wed, 7 May 2003 17:04:09 -0700, Jesse Barnes <jbarnes@sgi.com> said:
Jesse> Forgot to mention that there are tools that depend on this
Jesse> (e.g. VTune, but it only cares about the load addr and
Jesse> size), but /proc/ksyms may be enough already if the right
Jesse> symbols are exported. Is it still there in 2.5?
Nope, it's gone. Check with Rusty on what the story is here. I can't
believe there is no way for user-space to get at the kallsym info in
2.5, but if there is, I sure haven't found it yet.
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (12 preceding siblings ...)
2003-05-08 0:54 ` David Mosberger
@ 2003-05-08 1:07 ` David Mosberger
2003-05-08 1:46 ` Jesse Barnes
` (44 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-08 1:07 UTC (permalink / raw)
To: linux-ia64
>>>>> On Thu, 08 May 2003 10:13:06 +1000, Keith Owens <kaos@ocs.com.au> said:
Keith> On Wed, 7 May 2003 16:51:21 -0700, David Mosberger
Keith> <davidm@napali.hpl.hp.com> wrote:
>> How many tools are out there that depend on System.map or
>> similar? I suppose you could just generate a fresh System.map at
>> boot time (or have something like /proc/System.map).
Keith> ksymoops (user space), kdb, kksymoops (kernel).
What about crash-dump tools such as lkcd? Do they depend on absolute
addresses?
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (13 preceding siblings ...)
2003-05-08 1:07 ` David Mosberger
@ 2003-05-08 1:46 ` Jesse Barnes
2003-05-08 1:55 ` Keith Owens
` (43 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-05-08 1:46 UTC (permalink / raw)
To: linux-ia64
On Wed, May 07, 2003 at 06:07:58PM -0700, David Mosberger wrote:
> Keith> ksymoops (user space), kdb, kksymoops (kernel).
>
> What about crash-dump tools such as lkcd? Do they depend on absolute
> addresses?
We've added an ioctl to the lkcd device so the user level tools can
get the actual load address of the kernel, but I'm not sure if that's
part of the community version of lkcd yet.
Jesse
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (14 preceding siblings ...)
2003-05-08 1:46 ` Jesse Barnes
@ 2003-05-08 1:55 ` Keith Owens
2003-05-08 2:16 ` Keith Owens
` (42 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Keith Owens @ 2003-05-08 1:55 UTC (permalink / raw)
To: linux-ia64
On Wed, 7 May 2003 18:46:46 -0700,
Jesse Barnes <jbarnes@sgi.com> wrote:
>On Wed, May 07, 2003 at 06:07:58PM -0700, David Mosberger wrote:
>> Keith> ksymoops (user space), kdb, kksymoops (kernel).
>>
>> What about crash-dump tools such as lkcd? Do they depend on absolute
>> addresses?
>
>We've added an ioctl to the lkcd device so the user level tools can
>get the actual load address of the kernel, but I'm not sure if that's
>part of the community version of lkcd yet.
I send the patch to the lkcd list yesterday, no response yet.
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (15 preceding siblings ...)
2003-05-08 1:55 ` Keith Owens
@ 2003-05-08 2:16 ` Keith Owens
2003-05-08 4:59 ` David Mosberger
` (41 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Keith Owens @ 2003-05-08 2:16 UTC (permalink / raw)
To: linux-ia64
On Wed, 7 May 2003 17:23:31 -0700,
David Mosberger <davidm@napali.hpl.hp.com> wrote:
>>>>>> On Wed, 7 May 2003 17:21:18 -0700, David Mosberger <davidm@linux.hpl.hp.com> said:
>
>>>>>> On Wed, 7 May 2003 17:00:45 -0700, Jesse Barnes <jbarnes@sgi.com> said:
> Jesse> Ahh, this might be a good way to go. We'd basically be
> Jesse> relinking the kernel at load time with a new KERNEL_START,
> Jesse> right?
>
> David> Yup.
>
>On second thought, "relinking" might be confusing. "Relocating" is
>more accurate (we don't rearrange anything within the kernel, just
>moving the whole thing around, which is a lot easier).
Using what data? vmlinux does not contain any relocation data,
everything is converted to absolute addresses in the the final link
stage. If you only use standard ld output then elilo will have to
handle all the relocations, messy.
One possibility is to link vmlinux to a temporary file, using -r to
preserve the relocation data, followed by a link to the real vmlinux,
without -r. From the temporary file, extract the information that is
required to perform boot time relocation and append that data to vmlinux,
at _end. The kernel startup code (PIC assembler) runs the additional
table, adjusts the relocations then discards the table.
A quick check of a 2.4 ia64 kernel shows only these relocation types:
DIR32LSB
DIR64LSB
FPTR64LSB
GPREL22
IMM64
LTOFF22
LTOFF_FPTR22
PCREL21B
PCREL60B
SEGREL64LSB
GPREL22, LTOFF22, LTOFF_FPTR22, PCREL21B, PCREL60B, SEGREL64LSB are not
a problem, they are already PIC. DIR32LSB, DIR64LSB, FPTR64LSB are
easy to adjust. IMM64 is the only messy bit of code, lots of shifts.
Even IMM64 is not that hard to do in PIC asm.
This approach keeps all the kernel relocation code inside the ia64
tree, it does not rely on keeping elilo in sync with future kernel
and/or gcc changes. By extracting and formatting the relocation data
at build time, it simplifies and speeds up the load process.
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (16 preceding siblings ...)
2003-05-08 2:16 ` Keith Owens
@ 2003-05-08 4:59 ` David Mosberger
2003-05-08 16:07 ` Jesse Barnes
` (40 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-08 4:59 UTC (permalink / raw)
To: linux-ia64
>>>>> On Thu, 08 May 2003 12:16:29 +1000, Keith Owens <kaos@ocs.com.au> said:
>> On second thought, "relinking" might be confusing. "Relocating"
>> is more accurate (we don't rearrange anything within the kernel,
>> just moving the whole thing around, which is a lot easier).
Keith> Using what data? vmlinux does not contain any relocation
Keith> data, everything is converted to absolute addresses in the
Keith> the final link stage.
Oh, I would recommend to build the kernel as a shared object. That's
what we do for EFI apps under GNU EFI and it works well. (There are
some ugly corners in GNU EFI, but they have to do with converting the
ELF shared object into PE+ format.)
Keith> One possibility is to link vmlinux to a temporary file, using
Keith> -r to preserve the relocation data, followed by a link to the
Keith> real vmlinux, without -r. From the temporary file, extract
Keith> the information that is required to perform boot time
Keith> relocation and append that data to vmlinux, at _end. The
Keith> kernel startup code (PIC assembler) runs the additional
Keith> table, adjusts the relocations then discards the table.
Sounds rather fragile to me. Relocating shared objects is quite easy
actually. GNU EFI does it in 102 lines of ia64 assembly code. That's
probably pretty hard to beat.
Keith> A quick check of a 2.4 ia64 kernel shows only these
Keith> relocation types:
Keith> DIR32LSB DIR64LSB FPTR64LSB GPREL22 IMM64 LTOFF22
Keith> LTOFF_FPTR22 PCREL21B PCREL60B SEGREL64LSB
Keith> GPREL22, LTOFF22, LTOFF_FPTR22, PCREL21B, PCREL60B,
Keith> SEGREL64LSB are not a problem, they are already PIC.
Keith> DIR32LSB, DIR64LSB, FPTR64LSB are easy to adjust. IMM64 is
Keith> the only messy bit of code, lots of shifts. Even IMM64 is
Keith> not that hard to do in PIC asm.
Remember, we already have the in-kernel module loader, which has to
deal with "ld -r" modules. I'd have preferred if those had been
shared objects, too, but apparently the toolchains on some other
platforms is sufficiently broken that this wasn't a feasible option
(not for 2.6, anyhow).
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (17 preceding siblings ...)
2003-05-08 4:59 ` David Mosberger
@ 2003-05-08 16:07 ` Jesse Barnes
2003-05-08 17:07 ` David Mosberger
` (39 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-05-08 16:07 UTC (permalink / raw)
To: linux-ia64
On Wed, May 07, 2003 at 09:59:41PM -0700, David Mosberger wrote:
> Keith> GPREL22, LTOFF22, LTOFF_FPTR22, PCREL21B, PCREL60B,
> Keith> SEGREL64LSB are not a problem, they are already PIC.
> Keith> DIR32LSB, DIR64LSB, FPTR64LSB are easy to adjust. IMM64 is
> Keith> the only messy bit of code, lots of shifts. Even IMM64 is
> Keith> not that hard to do in PIC asm.
>
> Remember, we already have the in-kernel module loader, which has to
> deal with "ld -r" modules. I'd have preferred if those had been
> shared objects, too, but apparently the toolchains on some other
> platforms is sufficiently broken that this wasn't a feasible option
> (not for 2.6, anyhow).
So simply linking with 'ld -q' wouldn't be sufficient? Doesn't that
produce a binary that will boot w/o elilo changes, but can also be
relocated?
Thanks,
Jesse
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (18 preceding siblings ...)
2003-05-08 16:07 ` Jesse Barnes
@ 2003-05-08 17:07 ` David Mosberger
2003-05-08 17:20 ` Jesse Barnes
` (38 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-08 17:07 UTC (permalink / raw)
To: linux-ia64
>>>>> On Thu, 8 May 2003 09:07:14 -0700, Jesse Barnes <jbarnes@sgi.com> said:
Jesse> So simply linking with 'ld -q' wouldn't be sufficient? Doesn't that
Jesse> produce a binary that will boot w/o elilo changes, but can also be
Jesse> relocated?
Ah, yes, I had forgotten about -q. It probably would be sufficient.
And the backwards-compatibility it would achieve would definitely be
a plus.
A new concern though: I don't think text-replication will work well
with this scheme. If each replica is linked for a different address,
function pointers become a performance problem: whenever you call
through a function pointer, you'll end up executing on whatever
replica initialized the function pointer. Not good.
Unless I'm missing something, kernel relocation is therefore a bit of
a stillborn idea (unless text-replication really isn't all that
important because we all have humongous caches between nodes...).
If we do have to go the virtual remapping route, my preference would
be to stick the kernel somewhere in region 5 (0xa..). Has anyone
tried that? It should work fine in principle (modules already live in
that space).
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (19 preceding siblings ...)
2003-05-08 17:07 ` David Mosberger
@ 2003-05-08 17:20 ` Jesse Barnes
2003-05-08 17:50 ` David Mosberger
` (37 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-05-08 17:20 UTC (permalink / raw)
To: linux-ia64
On Thu, May 08, 2003 at 10:07:49AM -0700, David Mosberger wrote:
> Ah, yes, I had forgotten about -q. It probably would be sufficient.
> And the backwards-compatibility it would achieve would definitely be
> a plus.
I thought so too.
> A new concern though: I don't think text-replication will work well
> with this scheme. If each replica is linked for a different address,
> function pointers become a performance problem: whenever you call
> through a function pointer, you'll end up executing on whatever
> replica initialized the function pointer. Not good.
Right, I thought about that. I don't think we need text replication
yet though anyway (at least our platform doesn't).
> Unless I'm missing something, kernel relocation is therefore a bit of
> a stillborn idea (unless text-replication really isn't all that
> important because we all have humongous caches between nodes...).
>
> If we do have to go the virtual remapping route, my preference would
> be to stick the kernel somewhere in region 5 (0xa..). Has anyone
> tried that? It should work fine in principle (modules already live in
> that space).
I guess we can revisit that if we need/want to implement text
replication at some point in the future. It might not be that bad
though--I don't envision relocating each replica, but rather just
seperating the kernel text and data with the right translation
registers that point to local text and global data, but I haven't
thought about it much (this is what Tony's original patch did, and it
seemed to work well).
Jesse
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (20 preceding siblings ...)
2003-05-08 17:20 ` Jesse Barnes
@ 2003-05-08 17:50 ` David Mosberger
2003-05-08 17:54 ` Luck, Tony
` (36 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-08 17:50 UTC (permalink / raw)
To: linux-ia64
>>>>> On Thu, 8 May 2003 10:20:05 -0700, Jesse Barnes <jbarnes@sgi.com> said:
>> A new concern though: I don't think text-replication will work
>> well with this scheme. If each replica is linked for a different
>> address, function pointers become a performance problem: whenever
>> you call through a function pointer, you'll end up executing on
>> whatever replica initialized the function pointer. Not good.
Jesse> Right, I thought about that. I don't think we need text
Jesse> replication yet though anyway (at least our platform
Jesse> doesn't).
That's what I would have guessed. I think it might be different for
the 8870 chipset though. Or does it have a huge L4 cache, too?
>> If we do have to go the virtual remapping route, my preference
>> would be to stick the kernel somewhere in region 5 (0xa..). Has
>> anyone tried that? It should work fine in principle (modules
>> already live in that space).
Jesse> I guess we can revisit that if we need/want to implement text
Jesse> replication at some point in the future. It might not be
Jesse> that bad though--I don't envision relocating each replica,
Jesse> but rather just seperating the kernel text and data with the
Jesse> right translation registers that point to local text and
Jesse> global data, but I haven't thought about it much (this is
Jesse> what Tony's original patch did, and it seemed to work well).
But it would be rather painful to switch to the "kernel may live at
any address" model just to discover that a year later we're going to
map it virtually anyhow (in which case, the kernel could live at a
fixed address again).
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (21 preceding siblings ...)
2003-05-08 17:50 ` David Mosberger
@ 2003-05-08 17:54 ` Luck, Tony
2003-05-08 20:29 ` David Mosberger
` (35 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-08 17:54 UTC (permalink / raw)
To: linux-ia64
> If we do have to go the virtual remapping route, my preference would
> be to stick the kernel somewhere in region 5 (0xa..). Has anyone
> tried that? It should work fine in principle (modules already live in
> that space).
I didn't try region 5, but my patch didn't glitch when I moved from
text at 0xe002000000000000 data at 0xe0021000000000000000 to text at
0xfffffffe00000000 and data at 0xffffffff00000000 (though apparently
the kdb patch didn't play well with a kernel linked at these addresses),
so there is a good chance that a move to region 5 wouldn't take much
effort. In fact it might clear up the long standing issue with
/proc/kcore, if we put the kernel at the low end of region 5 all the
assumptions that module addresses are higher than kernel addresses
will be valid for ia64 too :-)
How does this look for an address map?
0xA000000000000000 kernel text
0xA000000100000000 kernel data
0xA000000200000000 percpu area
0xA000000300000000 vmalloc & kernel modules
0xBFFFFFFxxxxxxxxx CONFIG_VIRTUAL_MEM_MAP
Reserving an almost arbitrary 4GB for each of kernel text/data/percpu (I
picked 4GB, since it is the biggest page size supported by ia64 ... but
other larger boundaries might make sense to make the upper level page
tables for the vmalloc area start on a natural boundary).
-Tony
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (22 preceding siblings ...)
2003-05-08 17:54 ` Luck, Tony
@ 2003-05-08 20:29 ` David Mosberger
2003-05-08 22:17 ` Keith Owens
` (34 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-08 20:29 UTC (permalink / raw)
To: linux-ia64
>>>>> On Thu, 8 May 2003 10:54:05 -0700, "Luck, Tony" <tony.luck@intel.com> said:
Tony> In fact it might clear up the long standing issue with
Tony> /proc/kcore, if we put the kernel at the low end of region 5
Tony> all the assumptions that module addresses are higher than
Tony> kernel addresses will be valid for ia64 too :-)
Yes, indeed.
Tony> How does this look for an address map?
Tony> 0xA000000000000000 kernel text
Tony> 0xA000000100000000 kernel data
Tony> 0xA000000200000000 percpu area
Tony> 0xA000000300000000 vmalloc & kernel modules
Tony> 0xBFFFFFFxxxxxxxxx CONFIG_VIRTUAL_MEM_MAP
Tony> Reserving an almost arbitrary 4GB for each of kernel
Tony> text/data/percpu (I picked 4GB, since it is the biggest page
Tony> size supported by ia64 ... but other larger boundaries might
Tony> make sense to make the upper level page tables for the vmalloc
Tony> area start on a natural boundary).
I think we may want to reserve the first 4GB for miscellaneous stuff,
such as the start-of-kernel-space-hole, the gate page (and probably
soon a read-only page), and the percpu area. In particular the
guaranteed-not-to-be-mapped page needs to be right at the end of user
space.
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (23 preceding siblings ...)
2003-05-08 20:29 ` David Mosberger
@ 2003-05-08 22:17 ` Keith Owens
2003-05-08 22:27 ` Luck, Tony
` (33 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Keith Owens @ 2003-05-08 22:17 UTC (permalink / raw)
To: linux-ia64
On Thu, 8 May 2003 10:07:49 -0700,
David Mosberger <davidm@napali.hpl.hp.com> wrote:
>If we do have to go the virtual remapping route, my preference would
>be to stick the kernel somewhere in region 5 (0xa..). Has anyone
>tried that? It should work fine in principle (modules already live in
>that space).
That loses the alt[id]tlb handling for the kernel code and data. Is
that going to be a performance problem?
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (24 preceding siblings ...)
2003-05-08 22:17 ` Keith Owens
@ 2003-05-08 22:27 ` Luck, Tony
2003-05-08 22:31 ` Jesse Barnes
` (32 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-08 22:27 UTC (permalink / raw)
To: linux-ia64
> On Thu, 8 May 2003 10:07:49 -0700,
> David Mosberger <davidm@napali.hpl.hp.com> wrote:
> >If we do have to go the virtual remapping route, my preference would
> >be to stick the kernel somewhere in region 5 (0xa..). Has anyone
> >tried that? It should work fine in principle (modules
> already live in
> >that space).
>
> That loses the alt[id]tlb handling for the kernel code and data. Is
> that going to be a performance problem?
I assume that we are still planning to lock the translations for
the kernel into ITR[0] and DTR[0] ... we can still do this with
the kernel mapped in region 5.
-Tony
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (25 preceding siblings ...)
2003-05-08 22:27 ` Luck, Tony
@ 2003-05-08 22:31 ` Jesse Barnes
2003-05-08 22:53 ` David Mosberger
` (31 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-05-08 22:31 UTC (permalink / raw)
To: linux-ia64
On Thu, May 08, 2003 at 10:50:22AM -0700, David Mosberger wrote:
> Jesse> Right, I thought about that. I don't think we need text
> Jesse> replication yet though anyway (at least our platform
> Jesse> doesn't).
>
> That's what I would have guessed. I think it might be different for
> the 8870 chipset though. Or does it have a huge L4 cache, too?
If they don't need it, would you be happy taking the simple reloc code
in elilo? And it seems simple enough, so even if we have to throw it
away, no big loss.
Thanks,
Jesse
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (26 preceding siblings ...)
2003-05-08 22:31 ` Jesse Barnes
@ 2003-05-08 22:53 ` David Mosberger
2003-05-08 23:32 ` David Mosberger
` (30 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-08 22:53 UTC (permalink / raw)
To: linux-ia64
>>>>> On Fri, 09 May 2003 08:17:35 +1000, Keith Owens <kaos@ocs.com.au> said:
Keith> On Thu, 8 May 2003 10:07:49 -0700,
Keith> David Mosberger <davidm@napali.hpl.hp.com> wrote:
>> If we do have to go the virtual remapping route, my preference would
>> be to stick the kernel somewhere in region 5 (0xa..). Has anyone
>> tried that? It should work fine in principle (modules already live in
>> that space).
Keith> That loses the alt[id]tlb handling for the kernel code and data. Is
Keith> that going to be a performance problem?
Should be OK: I'm assuming you'd use a single (large) pinned TLB entry
to map the entire kernel. So no, if anything, performance improves
for the kernel itself.
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (27 preceding siblings ...)
2003-05-08 22:53 ` David Mosberger
@ 2003-05-08 23:32 ` David Mosberger
2003-05-09 0:01 ` Jesse Barnes
` (29 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-08 23:32 UTC (permalink / raw)
To: linux-ia64
>>>>> On Thu, 8 May 2003 15:31:14 -0700, Jesse Barnes <jbarnes@sgi.com> said:
Jesse> On Thu, May 08, 2003 at 10:50:22AM -0700, David Mosberger wrote:
Jesse> Right, I thought about that. I don't think we need text
Jesse> replication yet though anyway (at least our platform
Jesse> doesn't).
>> That's what I would have guessed. I think it might be different for
>> the 8870 chipset though. Or does it have a huge L4 cache, too?
Jesse> If they don't need it, would you be happy taking the simple reloc code
Jesse> in elilo? And it seems simple enough, so even if we have to throw it
Jesse> away, no big loss.
I'm confused. Last time we talked about this, you said something
along the lines that you couldn't find the reloc code. In any case,
for elilo changes, you have to talk to Stephane.
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (28 preceding siblings ...)
2003-05-08 23:32 ` David Mosberger
@ 2003-05-09 0:01 ` Jesse Barnes
2003-05-09 0:11 ` Jesse Barnes
` (28 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-05-09 0:01 UTC (permalink / raw)
To: linux-ia64
On Thu, May 08, 2003 at 04:32:15PM -0700, David Mosberger wrote:
> Jesse> If they don't need it, would you be happy taking the simple reloc code
> Jesse> in elilo? And it seems simple enough, so even if we have to throw it
> Jesse> away, no big loss.
>
> I'm confused. Last time we talked about this, you said something
> along the lines that you couldn't find the reloc code. In any case,
> for elilo changes, you have to talk to Stephane.
Um, now I'm confused. Couldn't find what? You're right though, I
guess the only kernel change we'd need is adding '-q' to the LDFLAGS.
Thanks,
Jesse
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (29 preceding siblings ...)
2003-05-09 0:01 ` Jesse Barnes
@ 2003-05-09 0:11 ` Jesse Barnes
2003-05-09 17:52 ` Jesse Barnes
` (27 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-05-09 0:11 UTC (permalink / raw)
To: linux-ia64
On Thu, May 08, 2003 at 05:01:35PM -0700, Jesse Barnes wrote:
> On Thu, May 08, 2003 at 04:32:15PM -0700, David Mosberger wrote:
> > Jesse> If they don't need it, would you be happy taking the simple reloc code
> > Jesse> in elilo? And it seems simple enough, so even if we have to throw it
> > Jesse> away, no big loss.
> >
> > I'm confused. Last time we talked about this, you said something
> > along the lines that you couldn't find the reloc code. In any case,
> > for elilo changes, you have to talk to Stephane.
>
> Um, now I'm confused. Couldn't find what? You're right though, I
> guess the only kernel change we'd need is adding '-q' to the LDFLAGS.
Ah, now I remember--elilo failing when kernels were stripped a certain
way. No longer a problem...
Thanks,
Jesse
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (30 preceding siblings ...)
2003-05-09 0:11 ` Jesse Barnes
@ 2003-05-09 17:52 ` Jesse Barnes
2003-05-09 18:25 ` David Mosberger
` (26 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-05-09 17:52 UTC (permalink / raw)
To: linux-ia64
On Wed, May 07, 2003 at 04:24:09PM -0700, Luck, Tony wrote:
> 1) My patch (posted around October last year) which picked virtual addresses
> in the wild blue yonder (initial versions used 0xe002000000000000, later ones
> used 0xffffffff00000000) for the link address for the kernel. Elilo can load
> kernel at any suitably aligned physical address, and head.S establishes the
> mappings using itr[0] and dtr[0].
>
> pros) provided separate maps for kernel text and data, so supported kernel text
> replication too.
> cons) __pa() no longer works on kernel addresses, use new __tpa() instead.
> Some ugly runtime patching of kernel code to get physical address of
> swapper_pg_dir into the TLB miss code.
>
> 2) I think SGI are currently running a modified version of #1 without the text
> replication support, and that provides a mapping from the normal virtual
> address that the kernel is linked for (0xe00000000044000000) to whatever physical
> address it was actually loaded at ... at least I think that's what Jack said.
>
> pros) simpler than my patch
>
> cons) Still needs __tpa() instead of __pa() for kernel addresses.
>
> 3) David's suggestion of boot-time relocation. Probably simplest to implement
> this in elilo, but if you are really good at PIC asm code it could be done in
> the kernel startup sequence.
>
> pros) Just like linking kernel at a new address.
> Avoids the __tpa() issue.
> Doesn't invalidate any assumptions about how to get from virtual to
> physical addresses and back again.
>
> cons) Nobody has implemented it.
doesn't address text replication concerns, while (1) and (2)
do
So, is there any consensus on the best path to pursue? Chris Wedgwood
is working on option #3, and I've got Tony's patch trimmed down to #2
(with one piece missing--ia64_switch_to runtime patching), but none of
these are in either 2.4 or 2.5 yet. Maybe for 2.4 we should do #2 or
#3 and for 2.5 we could implement #1 with the virtual offsets Tony
mentioned earlier?
Thanks,
Jesse
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (31 preceding siblings ...)
2003-05-09 17:52 ` Jesse Barnes
@ 2003-05-09 18:25 ` David Mosberger
2003-05-09 19:30 ` Jesse Barnes
` (25 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-09 18:25 UTC (permalink / raw)
To: linux-ia64
>>>>> On Fri, 9 May 2003 10:52:25 -0700, Jesse Barnes <jbarnes@sgi.com> said:
Jesse> So, is there any consensus on the best path to pursue? Chris Wedgwood
Jesse> is working on option #3, and I've got Tony's patch trimmed down to #2
Jesse> (with one piece missing--ia64_switch_to runtime patching), but none of
Jesse> these are in either 2.4 or 2.5 yet. Maybe for 2.4 we should do #2 or
Jesse> #3 and for 2.5 we could implement #1 with the virtual offsets Tony
Jesse> mentioned earlier?
I'm not sure. I got the impression Tony may be looking at the virtual
remapping in region 5. I haven't heard whether text replication
turned out to be important for 8870, but I'm starting to lean towards
virtual remapping because it is more versatile (can handle both
"strange" physical memory layouts and kernel replication). This,
coupled with the fact that it doesn't break any of the existing tools
makes it pretty compelling. Also, my primary objection about making
the kernel model more complicated doesn't hold much water if we move
everything to region 5.
Would there be a downside to this on SGI's machines?
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (32 preceding siblings ...)
2003-05-09 18:25 ` David Mosberger
@ 2003-05-09 19:30 ` Jesse Barnes
2003-05-09 19:31 ` Jack Steiner
` (24 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-05-09 19:30 UTC (permalink / raw)
To: linux-ia64
On Fri, May 09, 2003 at 11:25:29AM -0700, David Mosberger wrote:
> I'm not sure. I got the impression Tony may be looking at the virtual
> remapping in region 5. I haven't heard whether text replication
> turned out to be important for 8870, but I'm starting to lean towards
> virtual remapping because it is more versatile (can handle both
> "strange" physical memory layouts and kernel replication). This,
> coupled with the fact that it doesn't break any of the existing tools
> makes it pretty compelling. Also, my primary objection about making
> the kernel model more complicated doesn't hold much water if we move
> everything to region 5.
>
> Would there be a downside to this on SGI's machines?
Not that I can see--we've been using that method for awhile now with
good success, and I'd be happy to test any 2.5 patches that implement
this feature. So I guess we should go with virtual remapping? Tony?
Thanks,
Jesse
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (33 preceding siblings ...)
2003-05-09 19:30 ` Jesse Barnes
@ 2003-05-09 19:31 ` Jack Steiner
2003-05-09 20:02 ` Jack Steiner
` (23 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jack Steiner @ 2003-05-09 19:31 UTC (permalink / raw)
To: linux-ia64
>
> On Wed, May 07, 2003 at 04:24:09PM -0700, Luck, Tony wrote:
> > 1) My patch (posted around October last year) which picked virtual addresses
> > in the wild blue yonder (initial versions used 0xe002000000000000, later ones
> > used 0xffffffff00000000) for the link address for the kernel. Elilo can load
> > kernel at any suitably aligned physical address, and head.S establishes the
> > mappings using itr[0] and dtr[0].
> >
> > pros) provided separate maps for kernel text and data, so supported kernel text
> > replication too.
> > cons) __pa() no longer works on kernel addresses, use new __tpa() instead.
> > Some ugly runtime patching of kernel code to get physical address of
> > swapper_pg_dir into the TLB miss code.
The __tpa macros are ugly but they are fully contained within the ia64 part
of the tree. (IIRC, the old scheduler had a reference but the O(1) scheduler doe not).
In our tree, there are currently only 12 references to __tpa. All are
in boottime initialization code., mostly in mca.c. Although I would
rather not have __tpa, this doesnt seem too bad.
> >
> > 2) I think SGI are currently running a modified version of #1 without the text
> > replication support, and that provides a mapping from the normal virtual
> > address that the kernel is linked for (0xe00000000044000000) to whatever physical
> > address it was actually loaded at ... at least I think that's what Jack said.
Right.
> >
> > pros) simpler than my patch
> >
> > cons) Still needs __tpa() instead of __pa() for kernel addresses.
> >
> > 3) David's suggestion of boot-time relocation. Probably simplest to implement
> > this in elilo, but if you are really good at PIC asm code it could be done in
> > the kernel startup sequence.
> >
> > pros) Just like linking kernel at a new address.
> > Avoids the __tpa() issue.
> > Doesn't invalidate any assumptions about how to get from virtual to
> > physical addresses and back again.
> >
> > cons) Nobody has implemented it.
> doesn't address text replication concerns, while (1) and (2)
> do
>
> So, is there any consensus on the best path to pursue? Chris Wedgwood
> is working on option #3, and I've got Tony's patch trimmed down to #2
> (with one piece missing--ia64_switch_to runtime patching), but none of
> these are in either 2.4 or 2.5 yet. Maybe for 2.4 we should do #2 or
> #3 and for 2.5 we could implement #1 with the virtual offsets Tony
> mentioned earlier?
As far as I can tell, #1 is the only solution that will support kernel text replication.
We did experiments to measure the effect of kernel text replication. On AIM7,
(granted, not the best benchmark), we see a small but consistent
improvement in performance with text replication enabled. (Data attached below).
The biggest difference is cputime, not throughput.
This was from a relatively small system with very good remote-to-local latency
ratios. As system sizes increase, I expect the benefit of text replication will
increase.
I think that whatever solution is adopted, it need to accommadate text replication.
If we are going to do #1 for 2.5, it seems like #2 is the best solution for 2.4.
#3 requires changes elilo & the way System.map is used. These changes wont apply
to 2.5. #3 also will introduce some confusion since the System.map that is
generated at build time cant be used without knowing the physical address
where elilo actually loaded the kernel. The address could change based on minor
changes in the configuration (amount of memory, devices, nodes, etc).
------
AIM7
Kernel Text replication:
Tasks Jobs/Min JTI Real CPU Jobs/sec/task
200 18110.2 84 64.3 619.6 1.5092
500 23932.5 76 121.6 2947.1 0.7977
1000 24402.0 71 238.5 9890.6 0.4067
2000 23895.4 71 487.1 25537.8 0.1991
3000 22848.8 75 764.2 42549.7 0.1269
4000 20420.4 78 1140.0 64237.4 0.0851
Baseline
Tasks Jobs/Min JTI Real CPU Jobs/sec/task
200 18008.8 84 64.6 624.1 1.5007
500 23102.2 76 126.0 4238.1 0.7701
1000 23886.4 71 243.7 10507.3 0.3981
2000 23669.9 72 491.8 26120.2 0.1972
3000 22344.9 75 781.4 42965.0 0.1241
4000 20256.9 78 1149.2 64711.0 0.0844
>
> Thanks,
> Jesse
>
> _______________________________________________
> Linux-IA64 mailing list
> Linux-IA64@linuxia64.org
> http://lists.linuxia64.org/lists/listinfo/linux-ia64
>
--
Thanks
Jack Steiner (651-683-5302) (vnet 233-5302) steiner@sgi.com
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (34 preceding siblings ...)
2003-05-09 19:31 ` Jack Steiner
@ 2003-05-09 20:02 ` Jack Steiner
2003-05-09 20:25 ` David Mosberger
` (22 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jack Steiner @ 2003-05-09 20:02 UTC (permalink / raw)
To: linux-ia64
>
> >>>>> On Fri, 9 May 2003 10:52:25 -0700, Jesse Barnes <jbarnes@sgi.com> said:
>
> Jesse> So, is there any consensus on the best path to pursue? Chris Wedgwood
> Jesse> is working on option #3, and I've got Tony's patch trimmed down to #2
> Jesse> (with one piece missing--ia64_switch_to runtime patching), but none of
> Jesse> these are in either 2.4 or 2.5 yet. Maybe for 2.4 we should do #2 or
> Jesse> #3 and for 2.5 we could implement #1 with the virtual offsets Tony
> Jesse> mentioned earlier?
>
> I'm not sure. I got the impression Tony may be looking at the virtual
> remapping in region 5. I haven't heard whether text replication
> turned out to be important for 8870, but I'm starting to lean towards
> virtual remapping because it is more versatile (can handle both
> "strange" physical memory layouts and kernel replication). This,
> coupled with the fact that it doesn't break any of the existing tools
> makes it pretty compelling. Also, my primary objection about making
> the kernel model more complicated doesn't hold much water if we move
> everything to region 5.
>
> Would there be a downside to this on SGI's machines?
I dont see any significant problems. It actually seems easy.
I think we still need to use __tpa() for addresses assigned by the loader.
The standard __pa() macros wont work in region 5.
I dont have any objections to __tpa. We have had them in the kernel
since ~2.3.42 & have not had any problems with them. On occasion, when
we upgrade, we have to add/delete a couple of references but these are
always easy to find. I dont recall any changes for the last couple of upgrades
but maybe we were just lucky.
As I mention in mail earlier,
>> The __tpa macros are ugly but they are fully contained within the ia64 part
>> of the tree. (IIRC, the old scheduler had a reference but the O(1) scheduler doe not).
>> In our tree, there are currently only 12 references to __tpa. All are
>> in boottime initialization code, mostly in mca.c. Although I would
>> rather not have __tpa, this doesnt seem too bad.
>
> --david
>
> _______________________________________________
> Linux-IA64 mailing list
> Linux-IA64@linuxia64.org
> http://lists.linuxia64.org/lists/listinfo/linux-ia64
>
--
Thanks
Jack Steiner (651-683-5302) (vnet 233-5302) steiner@sgi.com
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (35 preceding siblings ...)
2003-05-09 20:02 ` Jack Steiner
@ 2003-05-09 20:25 ` David Mosberger
2003-05-09 21:43 ` Luck, Tony
` (21 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-09 20:25 UTC (permalink / raw)
To: linux-ia64
>>>>> On Fri, 9 May 2003 15:02:47 -0500 (CDT), Jack Steiner <steiner@sgi.com> said:
>> Would there be a downside to this on SGI's machines?
Jack> I dont see any significant problems. It actually seems easy.
Good.
Jack> I think we still need to use __tpa() for addresses assigned by
Jack> the loader. The standard __pa() macros wont work in region 5.
That's why I don't mind: nobody can use __pa() on region 5 anyhow, so
using something different (page table walk or __tpa()) is OK. I was
real uneasy with having a "magic" address range inside region 7, where
__pa() wouldn't work. But in region 5, no problem.
It might be good to call __tpa() something different: it's too close
to __pa() and the difference may easily be overlooked. For example,
ia64_tpa() would make it obvious that we're talking about an ia64
instruction.
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (36 preceding siblings ...)
2003-05-09 20:25 ` David Mosberger
@ 2003-05-09 21:43 ` Luck, Tony
2003-05-10 2:39 ` Jack Steiner
` (20 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-09 21:43 UTC (permalink / raw)
To: linux-ia64
> Not that I can see--we've been using that method for awhile now with
> good success, and I'd be happy to test any 2.5 patches that implement
> this feature. So I guess we should go with virtual remapping? Tony?
I started looking at porting the most recent version of my patch
(last seen around 2.5.39 ... only has the relocation, but once that
is it, adding kernel text replication is a relatively simple evolutionary
step).
I've been teaching a class all morning, and part of the afternoon, so
no demonstrable progress ... but most of the merge errors have been
cleaned up. Now looking last (and biggest) reject in head.S.
-Tony
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (37 preceding siblings ...)
2003-05-09 21:43 ` Luck, Tony
@ 2003-05-10 2:39 ` Jack Steiner
2003-05-13 22:18 ` Luck, Tony
` (19 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jack Steiner @ 2003-05-10 2:39 UTC (permalink / raw)
To: linux-ia64
>
> >>>>> On Fri, 9 May 2003 15:02:47 -0500 (CDT), Jack Steiner <steiner@sgi.com> said:
>
> >> Would there be a downside to this on SGI's machines?
>
> Jack> I dont see any significant problems. It actually seems easy.
>
> Good.
>
> Jack> I think we still need to use __tpa() for addresses assigned by
> Jack> the loader. The standard __pa() macros wont work in region 5.
>
> That's why I don't mind: nobody can use __pa() on region 5 anyhow, so
> using something different (page table walk or __tpa()) is OK. I was
> real uneasy with having a "magic" address range inside region 7, where
> __pa() wouldn't work. But in region 5, no problem.
>
> It might be good to call __tpa() something different: it's too close
> to __pa() and the difference may easily be overlooked. For example,
> ia64_tpa() would make it obvious that we're talking about an ia64
> instruction.
Seems reasonable.....
>
> --david
>
> _______________________________________________
> Linux-IA64 mailing list
> Linux-IA64@linuxia64.org
> http://lists.linuxia64.org/lists/listinfo/linux-ia64
>
--
Thanks
Jack Steiner (651-683-5302) (vnet 233-5302) steiner@sgi.com
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (38 preceding siblings ...)
2003-05-10 2:39 ` Jack Steiner
@ 2003-05-13 22:18 ` Luck, Tony
2003-05-14 1:24 ` Jesse Barnes
` (18 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-13 22:18 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 3522 bytes --]
Here's a proof of concept patch (against 2.5.67) that covers
most of the features we've been talking about in this thread.
This builds and boots both UP and SMP on a Tiger.
Kernel text and data are linked into region 5 at address
0xA000000100000000, leaving the bottom 4G of region 5 available
for miscellaneous stuff as suggested by David. It will be possible
to extend this patch to split text and data into separate sections
to allow kernel text replication, but I left that out for now to
keep this patch to the bare minimum.
This version still uses the __tpa() macro, but converting it to
use the ia64_tpa() inline function is just a matter of getting
all the types (argument and return value) so that the compiler
doesn't spit warnings all over the screen during build.
This patch doesn't fixup fs/proc/kcore.c (my old patch didn't
apply cleanly ... and much of the changes it would have made are
no longer needed as kernel is now at a lower address than modules).
Here's a file by file description of the changes:
arch/ia64/kernel/efi_stub.S
calls to ia64_switch_mode replaced by calls to
ia64_switch_mode_phys or ia64_switch_mode_virt
as appropriate
arch/ia64/kernel/entry.S
ia64_switch_to() doesn't need to test whether stack
overlaps DTR[0] (it can't, kernel and stack are in
different regions now).
arch/ia64/kernel/head.S
Initialize all the kernel region registers here (we
now have to do rr[5] and rr[7], so I moved rr[6] up
to the top and dropped the EARLY_PRINTK #ifdef for
symmetry). Map itr[0]/dtr[0] based on actual load
address. Map dtr[2] to the region 7 address of the
stack, and set IA64_KR(CURRENT_STACK) to correct
granule number.
Replace ia64_switch_mode() from a function that toggles
virtual mode on/off with separate functions to go from
virt to phys mode, and from phys to virt mode.
arch/ia64/kernel/ia64_ksyms.c
Export zero_page_memmap_ptr
arch/ia64/kernel/ivt.S
Provide labels on code that needs to be patched with the
physical address of swapper_pg_dir
arch/ia64/kernel/mca.c
Lots of __pa() need to be __tpa()
arch/ia64/kernel/pal.S
calls to ia64_switch_mode replaced by calls to
ia64_switch_mode_phys or ia64_switch_mode_virt
as appropriate
arch/ia64/kernel/setup.c
Get correct page numbers when marking start/end of kernel.
Code to patch ivt.S with physical address of swapper_pg_dir.
Pass region 7 virtual address of per-cpu area to
ia64_mmu_init() [UP code would have passed a region 5 kernel
address, which chokes the __pa() call in ia64_mmu_init()]
arch/ia64/kernel/smpboot.c
Use __tpa() to get physical address of entry point to start
other cpus
arch/ia64/mm/init.c
free_initmem() needs to be smarter about addresses
initialization of rr[5] and rr[6] moved to head.S
setup zero_page_memmap_ptr
arch/ia64/vmlinux.lds.S
Set LOAD_OFFSET to a sane default value for DIG machines,
kernel loaded at 64MB ... on machines where this patch is
really needed, elilo will have to ignore the load address
and put the kernel anyplace there is suitably aligned memory.
Use LOAD_OFFSET rather than PAGE_OFFSET throughout the rest
of the file.
include/asm-ia64/page.h
define __tpa() and __imva() macros
include/asm-ia64/pgtable.h
Move VMALLOC_START up above kernel (region 5 base + 8GB)
New definition of ZERO_PAGE uses zero_page_memmap_ptr
Drop KERNEL_TR_PAGE_NUM define, it is meaningless now.
include/asm-ia64/system.h
KERNEL_START moves to region 5 base + 4GB
[-- Attachment #2: reloc.diff --]
[-- Type: application/octet-stream, Size: 26636 bytes --]
diff -ru l2567-ref/arch/ia64/kernel/efi_stub.S l2567-reloc/arch/ia64/kernel/efi_stub.S
--- l2567-ref/arch/ia64/kernel/efi_stub.S Mon Apr 7 10:30:33 2003
+++ l2567-reloc/arch/ia64/kernel/efi_stub.S Thu May 8 13:51:25 2003
@@ -62,7 +62,7 @@
mov b6=r2
;;
andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
- br.call.sptk.many rp=ia64_switch_mode
+ br.call.sptk.many rp=ia64_switch_mode_phys
.ret0: mov out4=in5
mov out0=in1
mov out1=in2
@@ -73,7 +73,7 @@
br.call.sptk.many rp=b6 // call the EFI function
.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3
- br.call.sptk.many rp=ia64_switch_mode // return to virtual mode
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
.ret2: mov ar.rsc=loc4 // restore RSE configuration
mov ar.pfs=loc1
mov rp=loc0
diff -ru l2567-ref/arch/ia64/kernel/entry.S l2567-reloc/arch/ia64/kernel/entry.S
--- l2567-ref/arch/ia64/kernel/entry.S Thu May 1 10:19:28 2003
+++ l2567-reloc/arch/ia64/kernel/entry.S Fri May 9 15:21:15 2003
@@ -178,15 +178,12 @@
;;
st8 [r22]=sp // save kernel stack pointer of old task
shr.u r26=r20,IA64_GRANULE_SHIFT
- shr.u r17=r20,KERNEL_TR_PAGE_SHIFT
- ;;
- cmp.ne p6,p7=KERNEL_TR_PAGE_NUM,r17
adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
;;
/*
* If we've already mapped this task's page, we can skip doing it again.
*/
-(p6) cmp.eq p7,p6=r26,r27
+ cmp.eq p7,p6=r26,r27
(p6) br.cond.dpnt .map
;;
.done:
diff -ru l2567-ref/arch/ia64/kernel/head.S l2567-reloc/arch/ia64/kernel/head.S
--- l2567-ref/arch/ia64/kernel/head.S Thu May 1 10:19:28 2003
+++ l2567-reloc/arch/ia64/kernel/head.S Tue May 13 11:05:51 2003
@@ -60,22 +60,42 @@
mov r4=r0
.body
- /*
- * Initialize the region register for region 7 and install a translation register
- * that maps the kernel's text and data:
- */
rsm psr.i | psr.ic
- mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, PAGE_OFFSET) << 8) | (IA64_GRANULE_SHIFT << 2))
;;
srlz.i
+ ;;
+ /*
+ * Initialize kernel region registers:
+ * rr[5]: VHPT enabled, page size = PAGE_SHIFT
+ * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
+ * rr[5]: VHPT disabled, page size = IA64_GRANULE_SHIFT
+ */
+ mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r17=(5<<61)
+ mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
+ movl r19=(6<<61)
+ mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
+ movl r21=(7<<61)
+ ;;
+ mov rr[r17]=r16
+ mov rr[r19]=r18
+ mov rr[r21]=r20
+ ;;
+ /*
+ * Now pin mappings into the TLB for kernel text and data
+ */
mov r18=KERNEL_TR_PAGE_SHIFT<<2
movl r17=KERNEL_START
;;
- mov rr[r17]=r16
mov cr.itir=r18
mov cr.ifa=r17
mov r16=IA64_TR_KERNEL
- movl r18=((1 << KERNEL_TR_PAGE_SHIFT) | PAGE_KERNEL)
+ mov r3=ip
+ movl r18=PAGE_KERNEL
+ ;;
+ dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
+ ;;
+ or r18=r2,r18
;;
srlz.i
;;
@@ -113,16 +133,6 @@
mov ar.fpsr=r2
;;
-#ifdef CONFIG_IA64_EARLY_PRINTK
- mov r3=(6<<8) | (IA64_GRANULE_SHIFT<<2)
- movl r2=6<<61
- ;;
- mov rr[r2]=r3
- ;;
- srlz.i
- ;;
-#endif
-
#define isAP p2 // are we an Application Processor?
#define isBP p3 // are we the Bootstrap Processor?
@@ -143,12 +153,36 @@
movl r2=init_thread_union
cmp.eq isBP,isAP=r0,r0
#endif
- mov r16=KERNEL_TR_PAGE_NUM
;;
+ tpa r3=r2 // r3 == phys addr of task struct
+ // load mapping for stack (virtaddr in r2, physaddr in r3)
+ rsm psr.ic
+ movl r17=PAGE_KERNEL
+ ;;
+ srlz.d
+ dep r18=0,r3,0,12
+ ;;
+ or r18=r17,r18
+ dep r2=-1,r3,61,3 // IMVA of task
+ ;;
+ mov r17=rr[r2]
+ shr.u r16=r3,IA64_GRANULE_SHIFT
+ ;;
+ dep r17=0,r17,8,24
+ ;;
+ mov cr.itir=r17
+ mov cr.ifa=r2
+
+ mov r19=IA64_TR_CURRENT_STACK
+ ;;
+ itr.d dtr[r19]=r18
+ ;;
+ ssm psr.ic
+ srlz.d
+ ;;
// load the "current" pointer (r13) and ar.k6 with the current task
mov IA64_KR(CURRENT)=r2 // virtual address
- // initialize k4 to a safe value (64-128MB is mapped by TR_KERNEL)
mov IA64_KR(CURRENT_STACK)=r16
mov r13=r2
/*
@@ -665,14 +699,14 @@
END(__ia64_init_fpu)
/*
- * Switch execution mode from virtual to physical or vice versa.
+ * Switch execution mode from virtual to physical
*
* Inputs:
* r16 = new psr to establish
*
* Note: RSE must already be in enforced lazy mode
*/
-GLOBAL_ENTRY(ia64_switch_mode)
+GLOBAL_ENTRY(ia64_switch_mode_phys)
{
alloc r2=ar.pfs,0,0,0,0
rsm psr.i | psr.ic // disable interrupts and interrupt collection
@@ -682,35 +716,86 @@
{
flushrs // must be first insn in group
srlz.i
- shr.u r19=r15,61 // r19 <- top 3 bits of current IP
}
;;
mov cr.ipsr=r16 // set new PSR
- add r3=1f-ia64_switch_mode,r15
- xor r15=0x7,r19 // flip the region bits
+ add r3=1f-ia64_switch_mode_phys,r15
mov r17=ar.bsp
mov r14=rp // get return address into a general register
+ ;;
- // switch RSE backing store:
+ // going to physical mode, use tpa to translate virt->phys
+ tpa r17=r17
+ tpa r3=r3
+ tpa sp=sp
+ tpa r14=r14
;;
- dep r17=r15,r17,61,3 // make ar.bsp physical or virtual
+
mov r18=ar.rnat // save ar.rnat
- ;;
mov ar.bspstore=r17 // this steps on ar.rnat
- dep r3=r15,r3,61,3 // make rfi return address physical or virtual
+ mov cr.iip=r3
+ mov cr.ifs=r0
;;
+ mov ar.rnat=r18 // restore ar.rnat
+ rfi // must be last insn in group
+ ;;
+1: mov rp=r14
+ br.ret.sptk.many rp
+END(ia64_switch_mode_phys)
+
+/*
+ * Switch execution mode from physical to virtual
+ *
+ * Inputs:
+ * r16 = new psr to establish
+ *
+ * Note: RSE must already be in enforced lazy mode
+ */
+GLOBAL_ENTRY(ia64_switch_mode_virt)
+ {
+ alloc r2=ar.pfs,0,0,0,0
+ rsm psr.i | psr.ic // disable interrupts and interrupt collection
+ mov r15=ip
+ }
+ ;;
+ {
+ flushrs // must be first insn in group
+ srlz.i
+ }
+ ;;
+ mov cr.ipsr=r16 // set new PSR
+ add r3=1f-ia64_switch_mode_virt,r15
+
+ mov r17=ar.bsp
+ mov r14=rp // get return address into a general register
+ ;;
+
+ // going to virtual
+ // - for code addresses, set upper bits of addr to KERNEL_START
+ // - for stack addresses, set upper 3 bits to 0xe.... Dont change any of the
+ // lower bits since we want it to stay identity mapped
+ movl r18=KERNEL_START
+ dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+ dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+ dep r17=-1,r17,61,3
+ dep sp=-1,sp,61,3
+ ;;
+ or r3=r3,r18
+ or r14=r14,r18
+ ;;
+
+ mov r18=ar.rnat // save ar.rnat
+ mov ar.bspstore=r17 // this steps on ar.rnat
mov cr.iip=r3
mov cr.ifs=r0
- dep sp=r15,sp,61,3 // make stack pointer physical or virtual
;;
mov ar.rnat=r18 // restore ar.rnat
- dep r14=r15,r14,61,3 // make function return address physical or virtual
rfi // must be last insn in group
;;
1: mov rp=r14
br.ret.sptk.many rp
-END(ia64_switch_mode)
+END(ia64_switch_mode_virt)
#ifdef CONFIG_IA64_BRL_EMU
diff -ru l2567-ref/arch/ia64/kernel/ia64_ksyms.c l2567-reloc/arch/ia64/kernel/ia64_ksyms.c
--- l2567-ref/arch/ia64/kernel/ia64_ksyms.c Thu May 1 10:19:28 2003
+++ l2567-reloc/arch/ia64/kernel/ia64_ksyms.c Fri May 9 15:21:25 2003
@@ -146,6 +146,7 @@
EXPORT_SYMBOL(ia64_mv);
#endif
EXPORT_SYMBOL(machvec_noop);
+EXPORT_SYMBOL(zero_page_memmap_ptr);
#ifdef CONFIG_PERFMON
#include <asm/perfmon.h>
EXPORT_SYMBOL(pfm_install_alternate_syswide_subsystem);
diff -ru l2567-ref/arch/ia64/kernel/ivt.S l2567-reloc/arch/ia64/kernel/ivt.S
--- l2567-ref/arch/ia64/kernel/ivt.S Mon Apr 7 10:30:43 2003
+++ l2567-reloc/arch/ia64/kernel/ivt.S Mon May 12 13:28:44 2003
@@ -122,8 +122,13 @@
shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+ .global ia64_ivt_patch1
+ia64_ivt_patch1:
+{ .mlx // we patch this bundle to include physical address of swapper_pg_dir
srlz.d // ensure "rsm psr.dt" has taken effect
-(p6) movl r19=__pa(swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+(p6) movl r19=swapper_pg_dir // region 5 is rooted at swapper_pg_dir
+}
+ .pred.rel "mutex", p6, p7
(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
;;
@@ -415,8 +420,13 @@
shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
- srlz.d
-(p6) movl r19=__pa(swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+ .global ia64_ivt_patch2
+ia64_ivt_patch2:
+{ .mlx // we patch this bundle to include physical address of swapper_pg_dir
+ srlz.d // ensure "rsm psr.dt" has taken effect
+(p6) movl r19=swapper_pg_dir // region 5 is rooted at swapper_pg_dir
+}
+ .pred.rel "mutex", p6, p7
(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
;;
diff -ru l2567-ref/arch/ia64/kernel/mca.c l2567-reloc/arch/ia64/kernel/mca.c
--- l2567-ref/arch/ia64/kernel/mca.c Thu May 1 10:19:28 2003
+++ l2567-reloc/arch/ia64/kernel/mca.c Fri May 9 15:56:51 2003
@@ -641,17 +641,17 @@
IA64_MCA_DEBUG("ia64_mca_init: registered mca rendezvous spinloop and wakeup mech.\n");
- ia64_mc_info.imi_mca_handler = __pa(mca_hldlr_ptr->fp);
+ ia64_mc_info.imi_mca_handler = __tpa(mca_hldlr_ptr->fp);
/*
* XXX - disable SAL checksum by setting size to 0; should be
- * __pa(ia64_os_mca_dispatch_end) - __pa(ia64_os_mca_dispatch);
+ * __tpa(ia64_os_mca_dispatch_end) - __tpa(ia64_os_mca_dispatch);
*/
ia64_mc_info.imi_mca_handler_size = 0;
/* Register the os mca handler with SAL */
if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA,
ia64_mc_info.imi_mca_handler,
- mca_hldlr_ptr->gp,
+ __tpa(mca_hldlr_ptr->gp),
ia64_mc_info.imi_mca_handler_size,
0, 0, 0)))
{
@@ -661,15 +661,15 @@
}
IA64_MCA_DEBUG("ia64_mca_init: registered os mca handler with SAL at 0x%lx, gp = 0x%lx\n",
- ia64_mc_info.imi_mca_handler, mca_hldlr_ptr->gp);
+ ia64_mc_info.imi_mca_handler, __tpa(mca_hldlr_ptr->gp));
/*
* XXX - disable SAL checksum by setting size to 0, should be
* IA64_INIT_HANDLER_SIZE
*/
- ia64_mc_info.imi_monarch_init_handler = __pa(mon_init_ptr->fp);
+ ia64_mc_info.imi_monarch_init_handler = __tpa(mon_init_ptr->fp);
ia64_mc_info.imi_monarch_init_handler_size = 0;
- ia64_mc_info.imi_slave_init_handler = __pa(slave_init_ptr->fp);
+ ia64_mc_info.imi_slave_init_handler = __tpa(slave_init_ptr->fp);
ia64_mc_info.imi_slave_init_handler_size = 0;
IA64_MCA_DEBUG("ia64_mca_init: os init handler at %lx\n",
@@ -678,10 +678,10 @@
/* Register the os init handler with SAL */
if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT,
ia64_mc_info.imi_monarch_init_handler,
- __pa(ia64_get_gp()),
+ __tpa(ia64_get_gp()),
ia64_mc_info.imi_monarch_init_handler_size,
ia64_mc_info.imi_slave_init_handler,
- __pa(ia64_get_gp()),
+ __tpa(ia64_get_gp()),
ia64_mc_info.imi_slave_init_handler_size)))
{
printk(KERN_ERR "ia64_mca_init: Failed to register m/s init handlers with SAL. "
diff -ru l2567-ref/arch/ia64/kernel/pal.S l2567-reloc/arch/ia64/kernel/pal.S
--- l2567-ref/arch/ia64/kernel/pal.S Mon Apr 7 10:32:57 2003
+++ l2567-reloc/arch/ia64/kernel/pal.S Fri May 9 15:24:27 2003
@@ -164,7 +164,7 @@
;;
mov loc4=ar.rsc // save RSE configuration
dep.z loc2=loc2,0,61 // convert pal entry point to physical
- dep.z r8=r8,0,61 // convert rp to physical
+ tpa r8=r8 // convert rp to physical
;;
mov b7 = loc2 // install target to branch reg
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
@@ -174,13 +174,13 @@
or loc3=loc3,r17 // add in psr the bits to set
;;
andcm r16=loc3,r16 // removes bits to clear from psr
- br.call.sptk.many rp=ia64_switch_mode
+ br.call.sptk.many rp=ia64_switch_mode_phys
.ret1: mov rp = r8 // install return address (physical)
br.cond.sptk.many b7
1:
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3 // r16= original psr
- br.call.sptk.many rp=ia64_switch_mode // return to virtual mode
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
.ret2:
mov psr.l = loc3 // restore init PSR
@@ -228,13 +228,13 @@
mov b7 = loc2 // install target to branch reg
;;
andcm r16=loc3,r16 // removes bits to clear from psr
- br.call.sptk.many rp=ia64_switch_mode
+ br.call.sptk.many rp=ia64_switch_mode_phys
.ret6:
br.call.sptk.many rp=b7 // now make the call
.ret7:
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3 // r16= original psr
- br.call.sptk.many rp=ia64_switch_mode // return to virtual mode
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
.ret8: mov psr.l = loc3 // restore init PSR
mov ar.pfs = loc1
diff -ru l2567-ref/arch/ia64/kernel/setup.c l2567-reloc/arch/ia64/kernel/setup.c
--- l2567-ref/arch/ia64/kernel/setup.c Thu May 1 10:19:28 2003
+++ l2567-reloc/arch/ia64/kernel/setup.c Tue May 13 11:26:12 2003
@@ -286,8 +286,8 @@
+ strlen(__va(ia64_boot_param->command_line)) + 1);
n++;
- rsvd_region[n].start = KERNEL_START;
- rsvd_region[n].end = KERNEL_END;
+ rsvd_region[n].start = __imva(KERNEL_START);
+ rsvd_region[n].end = __imva(KERNEL_END);
n++;
#ifdef CONFIG_BLK_DEV_INITRD
@@ -347,6 +347,47 @@
#endif
}
+/*
+ * There are two places in the performance critical path of
+ * the exception handling code where we need to know the physical
+ * address of the swapper_pg_dir structure. This routine
+ * patches the "movl" instructions to load the value needed.
+ */
+static void __init
+patch_ivt_with_phys_swapper_pg_dir(void)
+{
+ extern char ia64_ivt_patch1[], ia64_ivt_patch2[];
+ unsigned long spd = __tpa(swapper_pg_dir);
+ unsigned long *p;
+
+ p = (unsigned long *)__imva(ia64_ivt_patch1);
+
+ *p = (*p & 0x3fffffffffffUL) |
+ ((spd & 0x000000ffffc00000UL)<<24);
+ p++;
+ *p = (*p & 0xf000080fff800000UL) |
+ ((spd & 0x8000000000000000UL) >> 4) |
+ ((spd & 0x7fffff0000000000UL) >> 40) |
+ ((spd & 0x00000000001f0000UL) << 29) |
+ ((spd & 0x0000000000200000UL) << 23) |
+ ((spd & 0x000000000000ff80UL) << 43) |
+ ((spd & 0x000000000000007fUL) << 36);
+
+ p = (unsigned long *)__imva(ia64_ivt_patch2);
+
+ *p = (*p & 0x3fffffffffffUL) |
+ ((spd & 0x000000ffffc00000UL)<<24);
+ p++;
+ *p = (*p & 0xf000080fff800000UL) |
+ ((spd & 0x8000000000000000UL) >> 4) |
+ ((spd & 0x7fffff0000000000UL) >> 40) |
+ ((spd & 0x00000000001f0000UL) << 29) |
+ ((spd & 0x0000000000200000UL) << 23) |
+ ((spd & 0x000000000000ff80UL) << 43) |
+ ((spd & 0x000000000000007fUL) << 36);
+}
+#define PATCH_IVT() patch_ivt_with_phys_swapper_pg_dir()
+
void __init
setup_arch (char **cmdline_p)
{
@@ -355,6 +396,8 @@
unw_init();
+ PATCH_IVT();
+
*cmdline_p = __va(ia64_boot_param->command_line);
strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */
@@ -715,7 +758,7 @@
if (current->mm)
BUG();
- ia64_mmu_init(cpu_data);
+ ia64_mmu_init((void *)__imva(cpu_data));
#ifdef CONFIG_IA32_SUPPORT
/* initialize global ia32 state - CR0 and CR4 */
diff -ru l2567-ref/arch/ia64/kernel/smpboot.c l2567-reloc/arch/ia64/kernel/smpboot.c
--- l2567-ref/arch/ia64/kernel/smpboot.c Thu May 1 10:19:28 2003
+++ l2567-reloc/arch/ia64/kernel/smpboot.c Fri May 9 15:56:33 2003
@@ -574,7 +574,7 @@
/* Tell SAL where to drop the AP's. */
ap_startup = (struct fptr *) start_ap;
sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
- __pa(ap_startup->fp), __pa(ap_startup->gp), 0, 0, 0, 0);
+ __tpa(ap_startup->fp), __tpa(ap_startup->gp), 0, 0, 0, 0);
if (sal_ret < 0)
printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n",
ia64_sal_strerror(sal_ret));
diff -ru l2567-ref/arch/ia64/mm/init.c l2567-reloc/arch/ia64/mm/init.c
--- l2567-ref/arch/ia64/mm/init.c Thu May 1 10:19:28 2003
+++ l2567-reloc/arch/ia64/mm/init.c Tue May 13 11:22:40 2003
@@ -47,6 +47,8 @@
static int pgt_cache_water[2] = { 25, 50 };
+struct page *zero_page_memmap_ptr; /* map entry for zero page */
+
void
check_pgt_cache (void)
{
@@ -112,14 +114,16 @@
void
free_initmem (void)
{
- unsigned long addr;
+ unsigned long addr, eaddr;
- addr = (unsigned long) &__init_begin;
- for (; addr < (unsigned long) &__init_end; addr += PAGE_SIZE) {
+ addr = (unsigned long)__imva(&__init_begin);
+ eaddr = (unsigned long)__imva(&__init_end);
+ while (addr < eaddr) {
ClearPageReserved(virt_to_page(addr));
set_page_count(virt_to_page(addr), 1);
free_page(addr);
++totalram_pages;
+ addr += PAGE_SIZE;
}
printk(KERN_INFO "Freeing unused kernel memory: %ldkB freed\n",
(&__init_end - &__init_begin) >> 10);
@@ -270,7 +274,7 @@
void __init
ia64_mmu_init (void *my_cpu_data)
{
- unsigned long psr, rid, pta, impl_va_bits;
+ unsigned long psr, pta, impl_va_bits;
extern void __init tlb_init (void);
#ifdef CONFIG_DISABLE_VHPT
# define VHPT_ENABLE_BIT 0
@@ -278,21 +282,8 @@
# define VHPT_ENABLE_BIT 1
#endif
- /*
- * Set up the kernel identity mapping for regions 6 and 5. The mapping for region
- * 7 is setup up in _start().
- */
+ /* Pin mapping for percpu area into TLB */
psr = ia64_clear_ic();
-
- rid = ia64_rid(IA64_REGION_ID_KERNEL, __IA64_UNCACHED_OFFSET);
- ia64_set_rr(__IA64_UNCACHED_OFFSET, (rid << 8) | (IA64_GRANULE_SHIFT << 2));
-
- rid = ia64_rid(IA64_REGION_ID_KERNEL, VMALLOC_START);
- ia64_set_rr(VMALLOC_START, (rid << 8) | (PAGE_SHIFT << 2) | 1);
-
- /* ensure rr6 is up-to-date before inserting the PERCPU_ADDR translation: */
- ia64_srlz_d();
-
ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
PERCPU_PAGE_SHIFT);
@@ -495,6 +486,7 @@
discontig_paging_init();
efi_memmap_walk(count_pages, &num_physpages);
+ zero_page_memmap_ptr = virt_to_page(__imva(empty_zero_page));
}
#else /* !CONFIG_DISCONTIGMEM */
void
@@ -567,6 +559,7 @@
}
free_area_init(zones_size);
# endif /* !CONFIG_VIRTUAL_MEM_MAP */
+ zero_page_memmap_ptr = virt_to_page(__imva(empty_zero_page));
}
#endif /* !CONFIG_DISCONTIGMEM */
@@ -637,7 +630,7 @@
pgt_cache_water[1] = num_pgt_pages;
/* install the gate page in the global page table: */
- put_gate_page(virt_to_page(__start_gate_section), GATE_ADDR);
+ put_gate_page(virt_to_page(__imva(__start_gate_section)), GATE_ADDR);
#ifdef CONFIG_IA32_SUPPORT
ia32_gdt_init();
diff -ru l2567-ref/arch/ia64/vmlinux.lds.S l2567-reloc/arch/ia64/vmlinux.lds.S
--- l2567-ref/arch/ia64/vmlinux.lds.S Mon Apr 7 10:32:27 2003
+++ l2567-reloc/arch/ia64/vmlinux.lds.S Tue May 13 08:44:27 2003
@@ -3,8 +3,9 @@
#include <asm/cache.h>
#include <asm/ptrace.h>
#include <asm/system.h>
+#include <asm/pgtable.h>
-#define LOAD_OFFSET PAGE_OFFSET
+#define LOAD_OFFSET KERNEL_START + KERNEL_TR_PAGE_SIZE
#include <asm-generic/vmlinux.lds.h>
OUTPUT_FORMAT("elf64-ia64-little")
@@ -23,22 +24,22 @@
}
v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
- phys_start = _start - PAGE_OFFSET;
+ phys_start = _start - LOAD_OFFSET;
. = KERNEL_START;
_text = .;
_stext = .;
- .text : AT(ADDR(.text) - PAGE_OFFSET)
+ .text : AT(ADDR(.text) - LOAD_OFFSET)
{
*(.text.ivt)
*(.text)
}
- .text2 : AT(ADDR(.text2) - PAGE_OFFSET)
+ .text2 : AT(ADDR(.text2) - LOAD_OFFSET)
{ *(.text2) }
#ifdef CONFIG_SMP
- .text.lock : AT(ADDR(.text.lock) - PAGE_OFFSET)
+ .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET)
{ *(.text.lock) }
#endif
_etext = .;
@@ -47,14 +48,14 @@
/* Exception table */
. = ALIGN(16);
- __ex_table : AT(ADDR(__ex_table) - PAGE_OFFSET)
+ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET)
{
__start___ex_table = .;
*(__ex_table)
__stop___ex_table = .;
}
- __mckinley_e9_bundles : AT(ADDR(__mckinley_e9_bundles) - PAGE_OFFSET)
+ __mckinley_e9_bundles : AT(ADDR(__mckinley_e9_bundles) - LOAD_OFFSET)
{
__start___mckinley_e9_bundles = .;
*(__mckinley_e9_bundles)
@@ -67,7 +68,7 @@
#if defined(CONFIG_IA64_GENERIC)
/* Machine Vector */
. = ALIGN(16);
- .machvec : AT(ADDR(.machvec) - PAGE_OFFSET)
+ .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
{
machvec_start = .;
*(.machvec)
@@ -77,9 +78,9 @@
/* Unwind info & table: */
. = ALIGN(8);
- .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - PAGE_OFFSET)
+ .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET)
{ *(.IA_64.unwind_info*) }
- .IA_64.unwind : AT(ADDR(.IA_64.unwind) - PAGE_OFFSET)
+ .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET)
{
ia64_unw_start = .;
*(.IA_64.unwind*)
@@ -88,20 +89,20 @@
RODATA
- .opd : AT(ADDR(.opd) - PAGE_OFFSET)
+ .opd : AT(ADDR(.opd) - LOAD_OFFSET)
{ *(.opd) }
/* Initialization code and data: */
. = ALIGN(PAGE_SIZE);
__init_begin = .;
- .init.text : AT(ADDR(.init.text) - PAGE_OFFSET)
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
{ *(.init.text) }
- .init.data : AT(ADDR(.init.data) - PAGE_OFFSET)
+ .init.data : AT(ADDR(.init.data) - LOAD_OFFSET)
{ *(.init.data) }
- .init.ramfs : AT(ADDR(.init.ramfs) - PAGE_OFFSET)
+ .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET)
{
__initramfs_start = .;
*(.init.ramfs)
@@ -109,19 +110,19 @@
}
. = ALIGN(16);
- .init.setup : AT(ADDR(.init.setup) - PAGE_OFFSET)
+ .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET)
{
__setup_start = .;
*(.init.setup)
__setup_end = .;
}
- __param : AT(ADDR(__param) - PAGE_OFFSET)
+ __param : AT(ADDR(__param) - LOAD_OFFSET)
{
__start___param = .;
*(__param)
__stop___param = .;
}
- .initcall.init : AT(ADDR(.initcall.init) - PAGE_OFFSET)
+ .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET)
{
__initcall_start = .;
*(.initcall1.init)
@@ -134,17 +135,17 @@
__initcall_end = .;
}
__con_initcall_start = .;
- .con_initcall.init : AT(ADDR(.con_initcall.init) - PAGE_OFFSET)
+ .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
{ *(.con_initcall.init) }
__con_initcall_end = .;
. = ALIGN(PAGE_SIZE);
__init_end = .;
/* The initial task and kernel stack */
- .data.init_task : AT(ADDR(.data.init_task) - PAGE_OFFSET)
+ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET)
{ *(.data.init_task) }
- .data.page_aligned : AT(ADDR(.data.page_aligned) - PAGE_OFFSET)
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET)
{ *(__special_page_section)
__start_gate_section = .;
*(.text.gate)
@@ -152,13 +153,13 @@
}
. = ALIGN(SMP_CACHE_BYTES);
- .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - PAGE_OFFSET)
+ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET)
{ *(.data.cacheline_aligned) }
/* Per-cpu data: */
. = ALIGN(PERCPU_PAGE_SIZE);
__phys_per_cpu_start = .;
- .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - PAGE_OFFSET)
+ .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
{
__per_cpu_start = .;
*(.data.percpu)
@@ -166,24 +167,24 @@
}
. = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into percpu page size */
- .data : AT(ADDR(.data) - PAGE_OFFSET)
+ .data : AT(ADDR(.data) - LOAD_OFFSET)
{ *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS }
. = ALIGN(16);
__gp = . + 0x200000; /* gp must be 16-byte aligned for exc. table */
- .got : AT(ADDR(.got) - PAGE_OFFSET)
+ .got : AT(ADDR(.got) - LOAD_OFFSET)
{ *(.got.plt) *(.got) }
/* We want the small data sections together, so single-instruction offsets
can access them all, and initialized data all before uninitialized, so
we can shorten the on-disk segment size. */
- .sdata : AT(ADDR(.sdata) - PAGE_OFFSET)
+ .sdata : AT(ADDR(.sdata) - LOAD_OFFSET)
{ *(.sdata) }
_edata = .;
_bss = .;
- .sbss : AT(ADDR(.sbss) - PAGE_OFFSET)
+ .sbss : AT(ADDR(.sbss) - LOAD_OFFSET)
{ *(.sbss) *(.scommon) }
- .bss : AT(ADDR(.bss) - PAGE_OFFSET)
+ .bss : AT(ADDR(.bss) - LOAD_OFFSET)
{ *(.bss) *(COMMON) }
_end = .;
diff -ru l2567-ref/include/asm-ia64/page.h l2567-reloc/include/asm-ia64/page.h
--- l2567-ref/include/asm-ia64/page.h Thu May 1 10:19:29 2003
+++ l2567-reloc/include/asm-ia64/page.h Fri May 9 15:58:45 2003
@@ -118,6 +118,8 @@
*/
#define __pa(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = 0; _v.l;})
#define __va(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = -1; _v.p;})
+#define __tpa(x) ({ia64_va _v; asm("tpa %0=%1" : "=r"(_v.l) : "r"(x)); _v.l;})
+#define __imva(x) ((long)__va(__tpa(x)))
#define REGION_NUMBER(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg;})
#define REGION_OFFSET(x) ({ia64_va _v; _v.l = (long) (x); _v.f.off;})
diff -ru l2567-ref/include/asm-ia64/pgtable.h l2567-reloc/include/asm-ia64/pgtable.h
--- l2567-ref/include/asm-ia64/pgtable.h Thu May 1 10:19:29 2003
+++ l2567-reloc/include/asm-ia64/pgtable.h Tue May 13 09:02:56 2003
@@ -205,7 +205,7 @@
#define RGN_MAP_LIMIT ((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE) /* per region addr limit */
#define RGN_KERNEL 7
-#define VMALLOC_START (0xa000000000000000 + 3*PERCPU_PAGE_SIZE)
+#define VMALLOC_START 0xa000000200000000
#define VMALLOC_VMADDR(x) ((unsigned long)(x))
#ifdef CONFIG_VIRTUAL_MEM_MAP
# define VMALLOC_END_INIT (0xa000000000000000 + (1UL << (4*PAGE_SHIFT - 9)))
@@ -448,7 +448,8 @@
* for zero-mapped memory areas etc..
*/
extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+extern struct page *zero_page_memmap_ptr;
+#define ZERO_PAGE(vaddr) (zero_page_memmap_ptr)
/* We provide our own get_unmapped_area to cope with VA holes for userland */
#define HAVE_ARCH_UNMAPPED_AREA
@@ -485,7 +486,6 @@
*/
#define KERNEL_TR_PAGE_SHIFT _PAGE_SIZE_64M
#define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT)
-#define KERNEL_TR_PAGE_NUM ((KERNEL_START - PAGE_OFFSET) / KERNEL_TR_PAGE_SIZE)
/*
* No page table caches to initialise
diff -ru l2567-ref/include/asm-ia64/system.h l2567-reloc/include/asm-ia64/system.h
--- l2567-ref/include/asm-ia64/system.h Thu May 1 10:19:29 2003
+++ l2567-reloc/include/asm-ia64/system.h Fri May 9 17:56:14 2003
@@ -19,7 +19,7 @@
#include <asm/pal.h>
#include <asm/percpu.h>
-#define KERNEL_START (PAGE_OFFSET + 68*1024*1024)
+#define KERNEL_START (0xA000000100000000)
/* 0xa000000000000000 - 0xa000000000000000+PERCPU_MAX_SIZE remain unmapped */
#define PERCPU_ADDR (0xa000000000000000 + PERCPU_PAGE_SIZE)
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (39 preceding siblings ...)
2003-05-13 22:18 ` Luck, Tony
@ 2003-05-14 1:24 ` Jesse Barnes
2003-05-14 5:29 ` Christian Hildner
` (17 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-05-14 1:24 UTC (permalink / raw)
To: linux-ia64
Great! I'm working on a 2.5.69 SN update now, so hopefully I'll have
a chance to test your patch a little soon.
Thanks,
Jesse
On Tue, May 13, 2003 at 03:18:48PM -0700, Luck, Tony wrote:
> Here's a proof of concept patch (against 2.5.67) that covers
> most of the features we've been talking about in this thread.
> This builds and boots both UP and SMP on a Tiger.
>
> Kernel text and data are linked into region 5 at address
> 0xA000000100000000, leaving the bottom 4G of region 5 available
> for miscellaneous stuff as suggested by David. It will be possible
> to extend this patch to split text and data into separate sections
> to allow kernel text replication, but I left that out for now to
> keep this patch to the bare minimum.
>
> This version still uses the __tpa() macro, but converting it to
> use the ia64_tpa() inline function is just a matter of getting
> all the types (argument and return value) so that the compiler
> doesn't spit warnings all over the screen during build.
>
> This patch doesn't fixup fs/proc/kcore.c (my old patch didn't
> apply cleanly ... and much of the changes it would have made are
> no longer needed as kernel is now at a lower address than modules).
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (40 preceding siblings ...)
2003-05-14 1:24 ` Jesse Barnes
@ 2003-05-14 5:29 ` Christian Hildner
2003-05-14 16:44 ` Luck, Tony
` (16 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Christian Hildner @ 2003-05-14 5:29 UTC (permalink / raw)
To: linux-ia64
tony.luck@intel.com schrieb:
> It will be possible to extend this patch to split text and data into
> separate sections to allow kernel text replication, but I left that
> out for now to keep this patch to the bare minimum.
>
If you plan to separate text and data of the kernel be aware of tpa only
translating data addresses and not instruction addresses. We would need
a tpa.d and tpa.i. I am wondering about the lack of "tpa.i" in the
itanium architecture design.
Christian
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (41 preceding siblings ...)
2003-05-14 5:29 ` Christian Hildner
@ 2003-05-14 16:44 ` Luck, Tony
2003-05-15 3:05 ` David Mosberger
` (15 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-14 16:44 UTC (permalink / raw)
To: linux-ia64
> If you plan to separate text and data of the kernel be aware
> of tpa only translating data addresses and not instruction
> addresses. We would need a tpa.d and tpa.i. I am wondering
> about the lack of "tpa.i" in the itanium architecture design.
The previous incarnations of the replicate kernel text patch
all provided both an ITR and DTR mapping for the replicated
area ... but I don't recall that tpa was the issue, I think
that there are various data-ish blobs in amongst the code that
need to be mapped ... there is definitely the .rodata
Here's some ASCII-art that shows physical layout to the left, and
virtual layout on the right. Sections in () show up at the virtual
addresses indicated, but aren't referenced by those addresses.
We use ITR[0] and DTR[0] to map what is labelled as the PERNODE
area (each node gets a copy of the bottom sections of the kernel,
so these mappings are different for each node), and DTR[3] to map
the GLOBAL area (same mapping on all nodes).
GLOBAL AREA
+------------+
| .bss |
| |
| .data |
| |
| .init.data |
| |
|(.init.text)|
PHYSICAL | |
+------------+ | (.rodata) |
| .bss | | |
| | | (.text) |
| .data | +------------+ Region5 + 8G
| |
| .init.data |
| |
| .init.text |
| | PERNODE AREA
| .rodata | +------------+
| | | (.bss) |
| .text | | |
+------------+ | (.data) |
| |
|(.init.data)|
| |
| .init.text |
| |
| .rodata |
| |
| .text |
+------------+ Region5 + 4G
-Tony
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (42 preceding siblings ...)
2003-05-14 16:44 ` Luck, Tony
@ 2003-05-15 3:05 ` David Mosberger
2003-05-15 16:33 ` Luck, Tony
` (14 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-15 3:05 UTC (permalink / raw)
To: linux-ia64
The patch looks mostly fine to me. I'm not too fond of the __tpa()
and __imva() macros, but that may be mainly a matter of preference.
What I definitely don't like is that the casting seems rather confused
and that the patch is adding __tpa() when we already have ia64_tpa().
As an example of the casting issues: __imva() returns a long, but at
times it's cast to "unsigned long" which doesn't make a lot of sense
(for assignments, anyhow). Moreover, we should stick to the Linux
principle that kernel-space pointers have a type of "void *".
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (43 preceding siblings ...)
2003-05-15 3:05 ` David Mosberger
@ 2003-05-15 16:33 ` Luck, Tony
2003-05-15 18:03 ` Jack Steiner
` (13 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-15 16:33 UTC (permalink / raw)
To: linux-ia64
> The patch looks mostly fine to me. I'm not too fond of the __tpa()
> and __imva() macros, but that may be mainly a matter of preference.
> What I definitely don't like is that the casting seems rather confused
> and that the patch is adding __tpa() when we already have ia64_tpa().
> As an example of the casting issues: __imva() returns a long, but at
> times it's cast to "unsigned long" which doesn't make a lot of sense
> (for assignments, anyhow). Moreover, we should stick to the Linux
> principle that kernel-space pointers have a type of "void *".
I can get rid of the __tpa() easily by using ia64_tpa() ... I'll
fix the type-casting fiasco while making those changes.
Do you have some direction for the __imva() macro? The acronym
stands for "Identity Mapped Virtual Address", and its purpose is
to provide the region 7 address for an object, so that the existing
code can continue to work without a whole lot of run-on changes.
If you just don't like the name, then it's easy to change to something
else. If it's the typecast issue, then I can switch over to void *.
If it just makes your head hurt coping with schizophrenic dual
mapping that the kernel gets with this patch, then you'll just have
to take a couple of aspirin and look at the patch again in the
morning :-)
-Tony
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (44 preceding siblings ...)
2003-05-15 16:33 ` Luck, Tony
@ 2003-05-15 18:03 ` Jack Steiner
2003-05-15 18:59 ` David Mosberger
` (12 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jack Steiner @ 2003-05-15 18:03 UTC (permalink / raw)
To: linux-ia64
>
> > The patch looks mostly fine to me. I'm not too fond of the __tpa()
> > and __imva() macros, but that may be mainly a matter of preference.
> > What I definitely don't like is that the casting seems rather confused
> > and that the patch is adding __tpa() when we already have ia64_tpa().
> > As an example of the casting issues: __imva() returns a long, but at
> > times it's cast to "unsigned long" which doesn't make a lot of sense
> > (for assignments, anyhow). Moreover, we should stick to the Linux
> > principle that kernel-space pointers have a type of "void *".
>
> I can get rid of the __tpa() easily by using ia64_tpa() ... I'll
> fix the type-casting fiasco while making those changes.
>
> Do you have some direction for the __imva() macro? The acronym
> stands for "Identity Mapped Virtual Address", and its purpose is
> to provide the region 7 address for an object, so that the existing
> code can continue to work without a whole lot of run-on changes.
> If you just don't like the name, then it's easy to change to something
> else. If it's the typecast issue, then I can switch over to void *.
> If it just makes your head hurt coping with schizophrenic dual
> mapping that the kernel gets with this patch, then you'll just have
> to take a couple of aspirin and look at the patch again in the
> morning :-)
You can always replace __imva(x) with __va(ia64_tpa(x)). This looks
very strange but is equivalent. The disadvantage is that it obscures
the conversion that is taking place.
Seem to me that a special macro (any name is fine) makes it easier to
understand the reason for the conversion.
>
> -Tony
>
>
--
Thanks
Jack Steiner (651-683-5302) (vnet 233-5302) steiner@sgi.com
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (45 preceding siblings ...)
2003-05-15 18:03 ` Jack Steiner
@ 2003-05-15 18:59 ` David Mosberger
2003-05-15 21:43 ` Luck, Tony
` (11 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-15 18:59 UTC (permalink / raw)
To: linux-ia64
>>>>> On Thu, 15 May 2003 09:33:12 -0700, "Luck, Tony" <tony.luck@intel.com> said:
Tony> Do you have some direction for the __imva() macro? The
Tony> acronym stands for "Identity Mapped Virtual Address", and its
Tony> purpose is to provide the region 7 address for an object, so
Tony> that the existing code can continue to work without a whole
Tony> lot of run-on changes. If you just don't like the name, then
Tony> it's easy to change to something else. If it's the typecast
Tony> issue, then I can switch over to void *. If it just makes
Tony> your head hurt coping with schizophrenic dual mapping that the
Tony> kernel gets with this patch, then you'll just have to take a
Tony> couple of aspirin and look at the patch again in the morning
Tony> :-)
It's a bit of "all of the above":
- The name isn't all that great, but since it _is_ doing something
rather strange, a strange names seems quite appropriate. The patch
should definitely add a brief (one-liner?) explanation of what
__imva() stands for and what it does. Also, I really dislike the
underscore silliness; let's just use ia64_imva() or some such, so
it's obvious that it is ia64-specific (there is no
namespace-pollution issue as there would be, say, in a user-level
library, so there is really no reason for using an underscore
prefix).
- the casting should be fixed
- instead of aspirins, it might be good to add the ASCII art you
posted recently in an appropriate place (either a header-file
or perhaps a Documentation/ia64 file); of course, the picture
you drew included text-replication, so we may want to hold off
with this until that part of the patch is in, too
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (46 preceding siblings ...)
2003-05-15 18:59 ` David Mosberger
@ 2003-05-15 21:43 ` Luck, Tony
2003-05-16 22:33 ` Luck, Tony
` (10 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-15 21:43 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 2112 bytes --]
> It's a bit of "all of the above":
>
> - The name isn't all that great, but since it _is_ doing something
> rather strange, a strange names seems quite appropriate. The patch
> should definitely add a brief (one-liner?) explanation of what
> __imva() stands for and what it does. Also, I really dislike the
> underscore silliness; let's just use ia64_imva() or some such, so
> it's obvious that it is ia64-specific (there is no
> namespace-pollution issue as there would be, say, in a user-level
> library, so there is really no reason for using an underscore
> prefix).
>
> - the casting should be fixed
>
> - instead of aspirins, it might be good to add the ASCII art you
> posted recently in an appropriate place (either a header-file
> or perhaps a Documentation/ia64 file); of course, the picture
> you drew included text-replication, so we may want to hold off
> with this until that part of the patch is in, too
Ok. Here's a new patch (against a snapshot pulled from
http://lia64.bkbits.net:8080/linux-ia64-2.5 at about 9:30
this morning). Builds and boots SMP on Tiger.
The __tpa() and __imva() macros are gone (along with their accursed
double underscore prefixes).
Types are cleaned up somewhat, there may be a couple of questionable
casts, but this looks to be as close to clean as I can make it.
The pre-existing ia64_tpa() function takes a __u64 argument, and returns
a __u64 value ... which matches nicely with all the uses in mca.c, setup.c
and smpboot.c which all use and return "unsigned long" (which is close
enough to __u64 that the compiler doesn't complain).
I've added a new function ia64_imva() which takes a "void *" argument and
returns a "void *" ... which matches with most of the uses, there are some
places that want an "unsigned long" return, so I still have some typecasts.
There's a two-line comment explaining what it does.
No ascii art in this patch, it isn't quite at head-ache complexity
yet. But I'll definitely put some pictures in when we get to replication
patches.
-Tony
[-- Attachment #2: reloc2569.diff --]
[-- Type: application/octet-stream, Size: 27110 bytes --]
diff -ru l2569-mosberger/arch/ia64/kernel/efi_stub.S l2569-aegl/arch/ia64/kernel/efi_stub.S
--- l2569-mosberger/arch/ia64/kernel/efi_stub.S Thu May 15 10:12:08 2003
+++ l2569-aegl/arch/ia64/kernel/efi_stub.S Thu May 15 11:45:02 2003
@@ -62,7 +62,7 @@
mov b6=r2
;;
andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
- br.call.sptk.many rp=ia64_switch_mode
+ br.call.sptk.many rp=ia64_switch_mode_phys
.ret0: mov out4=in5
mov out0=in1
mov out1=in2
@@ -73,7 +73,7 @@
br.call.sptk.many rp=b6 // call the EFI function
.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3
- br.call.sptk.many rp=ia64_switch_mode // return to virtual mode
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
.ret2: mov ar.rsc=loc4 // restore RSE configuration
mov ar.pfs=loc1
mov rp=loc0
diff -ru l2569-mosberger/arch/ia64/kernel/entry.S l2569-aegl/arch/ia64/kernel/entry.S
--- l2569-mosberger/arch/ia64/kernel/entry.S Thu May 15 10:12:08 2003
+++ l2569-aegl/arch/ia64/kernel/entry.S Thu May 15 11:45:02 2003
@@ -178,15 +178,12 @@
;;
st8 [r22]=sp // save kernel stack pointer of old task
shr.u r26=r20,IA64_GRANULE_SHIFT
- shr.u r17=r20,KERNEL_TR_PAGE_SHIFT
- ;;
- cmp.ne p6,p7=KERNEL_TR_PAGE_NUM,r17
adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
;;
/*
* If we've already mapped this task's page, we can skip doing it again.
*/
-(p6) cmp.eq p7,p6=r26,r27
+ cmp.eq p7,p6=r26,r27
(p6) br.cond.dpnt .map
;;
.done:
diff -ru l2569-mosberger/arch/ia64/kernel/head.S l2569-aegl/arch/ia64/kernel/head.S
--- l2569-mosberger/arch/ia64/kernel/head.S Thu May 15 10:12:09 2003
+++ l2569-aegl/arch/ia64/kernel/head.S Thu May 15 11:45:02 2003
@@ -60,22 +60,42 @@
mov r4=r0
.body
- /*
- * Initialize the region register for region 7 and install a translation register
- * that maps the kernel's text and data:
- */
rsm psr.i | psr.ic
- mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, PAGE_OFFSET) << 8) | (IA64_GRANULE_SHIFT << 2))
;;
srlz.i
+ ;;
+ /*
+ * Initialize kernel region registers:
+ * rr[5]: VHPT enabled, page size = PAGE_SHIFT
+ * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
+ * rr[5]: VHPT disabled, page size = IA64_GRANULE_SHIFT
+ */
+ mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
+ movl r17=(5<<61)
+ mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
+ movl r19=(6<<61)
+ mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
+ movl r21=(7<<61)
+ ;;
+ mov rr[r17]=r16
+ mov rr[r19]=r18
+ mov rr[r21]=r20
+ ;;
+ /*
+ * Now pin mappings into the TLB for kernel text and data
+ */
mov r18=KERNEL_TR_PAGE_SHIFT<<2
movl r17=KERNEL_START
;;
- mov rr[r17]=r16
mov cr.itir=r18
mov cr.ifa=r17
mov r16=IA64_TR_KERNEL
- movl r18=((1 << KERNEL_TR_PAGE_SHIFT) | PAGE_KERNEL)
+ mov r3=ip
+ movl r18=PAGE_KERNEL
+ ;;
+ dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
+ ;;
+ or r18=r2,r18
;;
srlz.i
;;
@@ -113,16 +133,6 @@
mov ar.fpsr=r2
;;
-#ifdef CONFIG_IA64_EARLY_PRINTK
- mov r3=(6<<8) | (IA64_GRANULE_SHIFT<<2)
- movl r2=6<<61
- ;;
- mov rr[r2]=r3
- ;;
- srlz.i
- ;;
-#endif
-
#define isAP p2 // are we an Application Processor?
#define isBP p3 // are we the Bootstrap Processor?
@@ -143,12 +153,36 @@
movl r2=init_thread_union
cmp.eq isBP,isAP=r0,r0
#endif
- mov r16=KERNEL_TR_PAGE_NUM
;;
+ tpa r3=r2 // r3 == phys addr of task struct
+ // load mapping for stack (virtaddr in r2, physaddr in r3)
+ rsm psr.ic
+ movl r17=PAGE_KERNEL
+ ;;
+ srlz.d
+ dep r18=0,r3,0,12
+ ;;
+ or r18=r17,r18
+ dep r2=-1,r3,61,3 // IMVA of task
+ ;;
+ mov r17=rr[r2]
+ shr.u r16=r3,IA64_GRANULE_SHIFT
+ ;;
+ dep r17=0,r17,8,24
+ ;;
+ mov cr.itir=r17
+ mov cr.ifa=r2
+
+ mov r19=IA64_TR_CURRENT_STACK
+ ;;
+ itr.d dtr[r19]=r18
+ ;;
+ ssm psr.ic
+ srlz.d
+ ;;
// load the "current" pointer (r13) and ar.k6 with the current task
mov IA64_KR(CURRENT)=r2 // virtual address
- // initialize k4 to a safe value (64-128MB is mapped by TR_KERNEL)
mov IA64_KR(CURRENT_STACK)=r16
mov r13=r2
/*
@@ -665,14 +699,14 @@
END(__ia64_init_fpu)
/*
- * Switch execution mode from virtual to physical or vice versa.
+ * Switch execution mode from virtual to physical
*
* Inputs:
* r16 = new psr to establish
*
* Note: RSE must already be in enforced lazy mode
*/
-GLOBAL_ENTRY(ia64_switch_mode)
+GLOBAL_ENTRY(ia64_switch_mode_phys)
{
alloc r2=ar.pfs,0,0,0,0
rsm psr.i | psr.ic // disable interrupts and interrupt collection
@@ -682,35 +716,86 @@
{
flushrs // must be first insn in group
srlz.i
- shr.u r19=r15,61 // r19 <- top 3 bits of current IP
}
;;
mov cr.ipsr=r16 // set new PSR
- add r3=1f-ia64_switch_mode,r15
- xor r15=0x7,r19 // flip the region bits
+ add r3=1f-ia64_switch_mode_phys,r15
mov r17=ar.bsp
mov r14=rp // get return address into a general register
+ ;;
- // switch RSE backing store:
+ // going to physical mode, use tpa to translate virt->phys
+ tpa r17=r17
+ tpa r3=r3
+ tpa sp=sp
+ tpa r14=r14
;;
- dep r17=r15,r17,61,3 // make ar.bsp physical or virtual
+
mov r18=ar.rnat // save ar.rnat
- ;;
mov ar.bspstore=r17 // this steps on ar.rnat
- dep r3=r15,r3,61,3 // make rfi return address physical or virtual
+ mov cr.iip=r3
+ mov cr.ifs=r0
;;
+ mov ar.rnat=r18 // restore ar.rnat
+ rfi // must be last insn in group
+ ;;
+1: mov rp=r14
+ br.ret.sptk.many rp
+END(ia64_switch_mode_phys)
+
+/*
+ * Switch execution mode from physical to virtual
+ *
+ * Inputs:
+ * r16 = new psr to establish
+ *
+ * Note: RSE must already be in enforced lazy mode
+ */
+GLOBAL_ENTRY(ia64_switch_mode_virt)
+ {
+ alloc r2=ar.pfs,0,0,0,0
+ rsm psr.i | psr.ic // disable interrupts and interrupt collection
+ mov r15=ip
+ }
+ ;;
+ {
+ flushrs // must be first insn in group
+ srlz.i
+ }
+ ;;
+ mov cr.ipsr=r16 // set new PSR
+ add r3=1f-ia64_switch_mode_virt,r15
+
+ mov r17=ar.bsp
+ mov r14=rp // get return address into a general register
+ ;;
+
+ // going to virtual
+ // - for code addresses, set upper bits of addr to KERNEL_START
+ // - for stack addresses, set upper 3 bits to 0xe.... Dont change any of the
+ // lower bits since we want it to stay identity mapped
+ movl r18=KERNEL_START
+ dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+ dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
+ dep r17=-1,r17,61,3
+ dep sp=-1,sp,61,3
+ ;;
+ or r3=r3,r18
+ or r14=r14,r18
+ ;;
+
+ mov r18=ar.rnat // save ar.rnat
+ mov ar.bspstore=r17 // this steps on ar.rnat
mov cr.iip=r3
mov cr.ifs=r0
- dep sp=r15,sp,61,3 // make stack pointer physical or virtual
;;
mov ar.rnat=r18 // restore ar.rnat
- dep r14=r15,r14,61,3 // make function return address physical or virtual
rfi // must be last insn in group
;;
1: mov rp=r14
br.ret.sptk.many rp
-END(ia64_switch_mode)
+END(ia64_switch_mode_virt)
#ifdef CONFIG_IA64_BRL_EMU
diff -ru l2569-mosberger/arch/ia64/kernel/ia64_ksyms.c l2569-aegl/arch/ia64/kernel/ia64_ksyms.c
--- l2569-mosberger/arch/ia64/kernel/ia64_ksyms.c Thu May 15 10:12:09 2003
+++ l2569-aegl/arch/ia64/kernel/ia64_ksyms.c Thu May 15 11:45:02 2003
@@ -159,6 +159,7 @@
EXPORT_SYMBOL(ia64_mv);
#endif
EXPORT_SYMBOL(machvec_noop);
+EXPORT_SYMBOL(zero_page_memmap_ptr);
#ifdef CONFIG_PERFMON
#include <asm/perfmon.h>
EXPORT_SYMBOL(pfm_install_alternate_syswide_subsystem);
diff -ru l2569-mosberger/arch/ia64/kernel/ivt.S l2569-aegl/arch/ia64/kernel/ivt.S
--- l2569-mosberger/arch/ia64/kernel/ivt.S Thu May 15 10:12:09 2003
+++ l2569-aegl/arch/ia64/kernel/ivt.S Thu May 15 11:45:02 2003
@@ -122,8 +122,13 @@
shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
+ .global ia64_ivt_patch1
+ia64_ivt_patch1:
+{ .mlx // we patch this bundle to include physical address of swapper_pg_dir
srlz.d // ensure "rsm psr.dt" has taken effect
-(p6) movl r19=__pa(swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+(p6) movl r19=swapper_pg_dir // region 5 is rooted at swapper_pg_dir
+}
+ .pred.rel "mutex", p6, p7
(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
;;
@@ -415,8 +420,13 @@
shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
- srlz.d
-(p6) movl r19=__pa(swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+ .global ia64_ivt_patch2
+ia64_ivt_patch2:
+{ .mlx // we patch this bundle to include physical address of swapper_pg_dir
+ srlz.d // ensure "rsm psr.dt" has taken effect
+(p6) movl r19=swapper_pg_dir // region 5 is rooted at swapper_pg_dir
+}
+ .pred.rel "mutex", p6, p7
(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
;;
diff -ru l2569-mosberger/arch/ia64/kernel/mca.c l2569-aegl/arch/ia64/kernel/mca.c
--- l2569-mosberger/arch/ia64/kernel/mca.c Thu May 15 10:12:09 2003
+++ l2569-aegl/arch/ia64/kernel/mca.c Thu May 15 13:19:31 2003
@@ -662,17 +662,17 @@
IA64_MCA_DEBUG("ia64_mca_init: registered mca rendezvous spinloop and wakeup mech.\n");
- ia64_mc_info.imi_mca_handler = __pa(mca_hldlr_ptr->fp);
+ ia64_mc_info.imi_mca_handler = ia64_tpa(mca_hldlr_ptr->fp);
/*
* XXX - disable SAL checksum by setting size to 0; should be
- * __pa(ia64_os_mca_dispatch_end) - __pa(ia64_os_mca_dispatch);
+ * ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch);
*/
ia64_mc_info.imi_mca_handler_size = 0;
/* Register the os mca handler with SAL */
if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA,
ia64_mc_info.imi_mca_handler,
- mca_hldlr_ptr->gp,
+ ia64_tpa(mca_hldlr_ptr->gp),
ia64_mc_info.imi_mca_handler_size,
0, 0, 0)))
{
@@ -682,15 +682,15 @@
}
IA64_MCA_DEBUG("ia64_mca_init: registered os mca handler with SAL at 0x%lx, gp = 0x%lx\n",
- ia64_mc_info.imi_mca_handler, mca_hldlr_ptr->gp);
+ ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp));
/*
* XXX - disable SAL checksum by setting size to 0, should be
* IA64_INIT_HANDLER_SIZE
*/
- ia64_mc_info.imi_monarch_init_handler = __pa(mon_init_ptr->fp);
+ ia64_mc_info.imi_monarch_init_handler = ia64_tpa(mon_init_ptr->fp);
ia64_mc_info.imi_monarch_init_handler_size = 0;
- ia64_mc_info.imi_slave_init_handler = __pa(slave_init_ptr->fp);
+ ia64_mc_info.imi_slave_init_handler = ia64_tpa(slave_init_ptr->fp);
ia64_mc_info.imi_slave_init_handler_size = 0;
IA64_MCA_DEBUG("ia64_mca_init: os init handler at %lx\n",
@@ -699,10 +699,10 @@
/* Register the os init handler with SAL */
if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT,
ia64_mc_info.imi_monarch_init_handler,
- __pa(ia64_get_gp()),
+ ia64_tpa(ia64_get_gp()),
ia64_mc_info.imi_monarch_init_handler_size,
ia64_mc_info.imi_slave_init_handler,
- __pa(ia64_get_gp()),
+ ia64_tpa(ia64_get_gp()),
ia64_mc_info.imi_slave_init_handler_size)))
{
printk(KERN_ERR "ia64_mca_init: Failed to register m/s init handlers with SAL. "
diff -ru l2569-mosberger/arch/ia64/kernel/pal.S l2569-aegl/arch/ia64/kernel/pal.S
--- l2569-mosberger/arch/ia64/kernel/pal.S Thu May 15 10:12:09 2003
+++ l2569-aegl/arch/ia64/kernel/pal.S Thu May 15 11:45:02 2003
@@ -164,7 +164,7 @@
;;
mov loc4=ar.rsc // save RSE configuration
dep.z loc2=loc2,0,61 // convert pal entry point to physical
- dep.z r8=r8,0,61 // convert rp to physical
+ tpa r8=r8 // convert rp to physical
;;
mov b7 = loc2 // install target to branch reg
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
@@ -174,13 +174,13 @@
or loc3=loc3,r17 // add in psr the bits to set
;;
andcm r16=loc3,r16 // removes bits to clear from psr
- br.call.sptk.many rp=ia64_switch_mode
+ br.call.sptk.many rp=ia64_switch_mode_phys
.ret1: mov rp = r8 // install return address (physical)
br.cond.sptk.many b7
1:
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3 // r16= original psr
- br.call.sptk.many rp=ia64_switch_mode // return to virtual mode
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
.ret2:
mov psr.l = loc3 // restore init PSR
@@ -228,13 +228,13 @@
mov b7 = loc2 // install target to branch reg
;;
andcm r16=loc3,r16 // removes bits to clear from psr
- br.call.sptk.many rp=ia64_switch_mode
+ br.call.sptk.many rp=ia64_switch_mode_phys
.ret6:
br.call.sptk.many rp=b7 // now make the call
.ret7:
mov ar.rsc=0 // put RSE in enforced lazy, LE mode
mov r16=loc3 // r16= original psr
- br.call.sptk.many rp=ia64_switch_mode // return to virtual mode
+ br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
.ret8: mov psr.l = loc3 // restore init PSR
mov ar.pfs = loc1
diff -ru l2569-mosberger/arch/ia64/kernel/setup.c l2569-aegl/arch/ia64/kernel/setup.c
--- l2569-mosberger/arch/ia64/kernel/setup.c Thu May 15 10:12:09 2003
+++ l2569-aegl/arch/ia64/kernel/setup.c Thu May 15 13:47:44 2003
@@ -265,7 +265,7 @@
static void
find_memory (void)
{
-# define KERNEL_END ((unsigned long) &_end)
+# define KERNEL_END (&_end)
unsigned long bootmap_size;
unsigned long max_pfn;
int n = 0;
@@ -286,8 +286,8 @@
+ strlen(__va(ia64_boot_param->command_line)) + 1);
n++;
- rsvd_region[n].start = KERNEL_START;
- rsvd_region[n].end = KERNEL_END;
+ rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
+ rsvd_region[n].end = (unsigned long) ia64_imva(KERNEL_END);
n++;
#ifdef CONFIG_BLK_DEV_INITRD
@@ -347,6 +347,47 @@
#endif
}
+/*
+ * There are two places in the performance critical path of
+ * the exception handling code where we need to know the physical
+ * address of the swapper_pg_dir structure. This routine
+ * patches the "movl" instructions to load the value needed.
+ */
+static void __init
+patch_ivt_with_phys_swapper_pg_dir(void)
+{
+ extern char ia64_ivt_patch1[], ia64_ivt_patch2[];
+ unsigned long spd = ia64_tpa((__u64)swapper_pg_dir);
+ unsigned long *p;
+
+ p = (unsigned long *)ia64_imva(ia64_ivt_patch1);
+
+ *p = (*p & 0x3fffffffffffUL) |
+ ((spd & 0x000000ffffc00000UL)<<24);
+ p++;
+ *p = (*p & 0xf000080fff800000UL) |
+ ((spd & 0x8000000000000000UL) >> 4) |
+ ((spd & 0x7fffff0000000000UL) >> 40) |
+ ((spd & 0x00000000001f0000UL) << 29) |
+ ((spd & 0x0000000000200000UL) << 23) |
+ ((spd & 0x000000000000ff80UL) << 43) |
+ ((spd & 0x000000000000007fUL) << 36);
+
+ p = (unsigned long *)ia64_imva(ia64_ivt_patch2);
+
+ *p = (*p & 0x3fffffffffffUL) |
+ ((spd & 0x000000ffffc00000UL)<<24);
+ p++;
+ *p = (*p & 0xf000080fff800000UL) |
+ ((spd & 0x8000000000000000UL) >> 4) |
+ ((spd & 0x7fffff0000000000UL) >> 40) |
+ ((spd & 0x00000000001f0000UL) << 29) |
+ ((spd & 0x0000000000200000UL) << 23) |
+ ((spd & 0x000000000000ff80UL) << 43) |
+ ((spd & 0x000000000000007fUL) << 36);
+}
+#define PATCH_IVT() patch_ivt_with_phys_swapper_pg_dir()
+
void __init
setup_arch (char **cmdline_p)
{
@@ -355,6 +396,8 @@
unw_init();
+ PATCH_IVT();
+
*cmdline_p = __va(ia64_boot_param->command_line);
strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; /* for safety */
@@ -755,7 +798,7 @@
if (current->mm)
BUG();
- ia64_mmu_init(cpu_data);
+ ia64_mmu_init(ia64_imva(cpu_data));
#ifdef CONFIG_IA32_SUPPORT
/* initialize global ia32 state - CR0 and CR4 */
diff -ru l2569-mosberger/arch/ia64/kernel/smpboot.c l2569-aegl/arch/ia64/kernel/smpboot.c
--- l2569-mosberger/arch/ia64/kernel/smpboot.c Thu May 15 10:12:09 2003
+++ l2569-aegl/arch/ia64/kernel/smpboot.c Thu May 15 13:22:14 2003
@@ -598,7 +598,7 @@
/* Tell SAL where to drop the AP's. */
ap_startup = (struct fptr *) start_ap;
sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
- __pa(ap_startup->fp), __pa(ap_startup->gp), 0, 0, 0, 0);
+ ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0);
if (sal_ret < 0)
printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n",
ia64_sal_strerror(sal_ret));
diff -ru l2569-mosberger/arch/ia64/mm/init.c l2569-aegl/arch/ia64/mm/init.c
--- l2569-mosberger/arch/ia64/mm/init.c Thu May 15 10:12:09 2003
+++ l2569-aegl/arch/ia64/mm/init.c Thu May 15 14:09:55 2003
@@ -47,6 +47,8 @@
static int pgt_cache_water[2] = { 25, 50 };
+struct page *zero_page_memmap_ptr; /* map entry for zero page */
+
void
check_pgt_cache (void)
{
@@ -112,14 +114,16 @@
void
free_initmem (void)
{
- unsigned long addr;
+ unsigned long addr, eaddr;
- addr = (unsigned long) &__init_begin;
- for (; addr < (unsigned long) &__init_end; addr += PAGE_SIZE) {
+ addr = (unsigned long) ia64_imva(&__init_begin);
+ eaddr = (unsigned long) ia64_imva(&__init_end);
+ while (addr < eaddr) {
ClearPageReserved(virt_to_page(addr));
set_page_count(virt_to_page(addr), 1);
free_page(addr);
++totalram_pages;
+ addr += PAGE_SIZE;
}
printk(KERN_INFO "Freeing unused kernel memory: %ldkB freed\n",
(&__init_end - &__init_begin) >> 10);
@@ -269,7 +273,7 @@
void __init
ia64_mmu_init (void *my_cpu_data)
{
- unsigned long psr, rid, pta, impl_va_bits;
+ unsigned long psr, pta, impl_va_bits;
extern void __init tlb_init (void);
#ifdef CONFIG_DISABLE_VHPT
# define VHPT_ENABLE_BIT 0
@@ -277,21 +281,8 @@
# define VHPT_ENABLE_BIT 1
#endif
- /*
- * Set up the kernel identity mapping for regions 6 and 5. The mapping for region
- * 7 is setup up in _start().
- */
+ /* Pin mapping for percpu area into TLB */
psr = ia64_clear_ic();
-
- rid = ia64_rid(IA64_REGION_ID_KERNEL, __IA64_UNCACHED_OFFSET);
- ia64_set_rr(__IA64_UNCACHED_OFFSET, (rid << 8) | (IA64_GRANULE_SHIFT << 2));
-
- rid = ia64_rid(IA64_REGION_ID_KERNEL, VMALLOC_START);
- ia64_set_rr(VMALLOC_START, (rid << 8) | (PAGE_SHIFT << 2) | 1);
-
- /* ensure rr6 is up-to-date before inserting the PERCPU_ADDR translation: */
- ia64_srlz_d();
-
ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
PERCPU_PAGE_SHIFT);
@@ -489,6 +480,7 @@
discontig_paging_init();
efi_memmap_walk(count_pages, &num_physpages);
+ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
#else /* !CONFIG_DISCONTIGMEM */
void
@@ -560,6 +552,7 @@
}
free_area_init(zones_size);
# endif /* !CONFIG_VIRTUAL_MEM_MAP */
+ zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
}
#endif /* !CONFIG_DISCONTIGMEM */
@@ -630,7 +623,7 @@
pgt_cache_water[1] = num_pgt_pages;
/* install the gate page in the global page table: */
- put_gate_page(virt_to_page(__start_gate_section), GATE_ADDR);
+ put_gate_page(virt_to_page(ia64_imva(__start_gate_section)), GATE_ADDR);
#ifdef CONFIG_IA32_SUPPORT
ia32_gdt_init();
diff -ru l2569-mosberger/arch/ia64/vmlinux.lds.S l2569-aegl/arch/ia64/vmlinux.lds.S
--- l2569-mosberger/arch/ia64/vmlinux.lds.S Thu May 15 10:12:08 2003
+++ l2569-aegl/arch/ia64/vmlinux.lds.S Thu May 15 11:46:25 2003
@@ -3,8 +3,9 @@
#include <asm/cache.h>
#include <asm/ptrace.h>
#include <asm/system.h>
+#include <asm/pgtable.h>
-#define LOAD_OFFSET PAGE_OFFSET
+#define LOAD_OFFSET KERNEL_START + KERNEL_TR_PAGE_SIZE
#include <asm-generic/vmlinux.lds.h>
OUTPUT_FORMAT("elf64-ia64-little")
@@ -23,22 +24,22 @@
}
v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
- phys_start = _start - PAGE_OFFSET;
+ phys_start = _start - LOAD_OFFSET;
. = KERNEL_START;
_text = .;
_stext = .;
- .text : AT(ADDR(.text) - PAGE_OFFSET)
+ .text : AT(ADDR(.text) - LOAD_OFFSET)
{
*(.text.ivt)
*(.text)
}
- .text2 : AT(ADDR(.text2) - PAGE_OFFSET)
+ .text2 : AT(ADDR(.text2) - LOAD_OFFSET)
{ *(.text2) }
#ifdef CONFIG_SMP
- .text.lock : AT(ADDR(.text.lock) - PAGE_OFFSET)
+ .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET)
{ *(.text.lock) }
#endif
_etext = .;
@@ -47,14 +48,14 @@
/* Exception table */
. = ALIGN(16);
- __ex_table : AT(ADDR(__ex_table) - PAGE_OFFSET)
+ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET)
{
__start___ex_table = .;
*(__ex_table)
__stop___ex_table = .;
}
- __mckinley_e9_bundles : AT(ADDR(__mckinley_e9_bundles) - PAGE_OFFSET)
+ __mckinley_e9_bundles : AT(ADDR(__mckinley_e9_bundles) - LOAD_OFFSET)
{
__start___mckinley_e9_bundles = .;
*(__mckinley_e9_bundles)
@@ -67,7 +68,7 @@
#if defined(CONFIG_IA64_GENERIC)
/* Machine Vector */
. = ALIGN(16);
- .machvec : AT(ADDR(.machvec) - PAGE_OFFSET)
+ .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
{
machvec_start = .;
*(.machvec)
@@ -77,9 +78,9 @@
/* Unwind info & table: */
. = ALIGN(8);
- .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - PAGE_OFFSET)
+ .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET)
{ *(.IA_64.unwind_info*) }
- .IA_64.unwind : AT(ADDR(.IA_64.unwind) - PAGE_OFFSET)
+ .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET)
{
ia64_unw_start = .;
*(.IA_64.unwind*)
@@ -88,24 +89,24 @@
RODATA
- .opd : AT(ADDR(.opd) - PAGE_OFFSET)
+ .opd : AT(ADDR(.opd) - LOAD_OFFSET)
{ *(.opd) }
/* Initialization code and data: */
. = ALIGN(PAGE_SIZE);
__init_begin = .;
- .init.text : AT(ADDR(.init.text) - PAGE_OFFSET)
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
{
_sinittext = .;
*(.init.text)
_einittext = .;
}
- .init.data : AT(ADDR(.init.data) - PAGE_OFFSET)
+ .init.data : AT(ADDR(.init.data) - LOAD_OFFSET)
{ *(.init.data) }
- .init.ramfs : AT(ADDR(.init.ramfs) - PAGE_OFFSET)
+ .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET)
{
__initramfs_start = .;
*(.init.ramfs)
@@ -113,19 +114,19 @@
}
. = ALIGN(16);
- .init.setup : AT(ADDR(.init.setup) - PAGE_OFFSET)
+ .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET)
{
__setup_start = .;
*(.init.setup)
__setup_end = .;
}
- __param : AT(ADDR(__param) - PAGE_OFFSET)
+ __param : AT(ADDR(__param) - LOAD_OFFSET)
{
__start___param = .;
*(__param)
__stop___param = .;
}
- .initcall.init : AT(ADDR(.initcall.init) - PAGE_OFFSET)
+ .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET)
{
__initcall_start = .;
*(.initcall1.init)
@@ -138,17 +139,17 @@
__initcall_end = .;
}
__con_initcall_start = .;
- .con_initcall.init : AT(ADDR(.con_initcall.init) - PAGE_OFFSET)
+ .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
{ *(.con_initcall.init) }
__con_initcall_end = .;
. = ALIGN(PAGE_SIZE);
__init_end = .;
/* The initial task and kernel stack */
- .data.init_task : AT(ADDR(.data.init_task) - PAGE_OFFSET)
+ .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET)
{ *(.data.init_task) }
- .data.page_aligned : AT(ADDR(.data.page_aligned) - PAGE_OFFSET)
+ .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET)
{ *(__special_page_section)
__start_gate_section = .;
*(.text.gate)
@@ -156,13 +157,13 @@
}
. = ALIGN(SMP_CACHE_BYTES);
- .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - PAGE_OFFSET)
+ .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET)
{ *(.data.cacheline_aligned) }
/* Per-cpu data: */
. = ALIGN(PERCPU_PAGE_SIZE);
__phys_per_cpu_start = .;
- .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - PAGE_OFFSET)
+ .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
{
__per_cpu_start = .;
*(.data.percpu)
@@ -170,24 +171,24 @@
}
. = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into percpu page size */
- .data : AT(ADDR(.data) - PAGE_OFFSET)
+ .data : AT(ADDR(.data) - LOAD_OFFSET)
{ *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS }
. = ALIGN(16);
__gp = . + 0x200000; /* gp must be 16-byte aligned for exc. table */
- .got : AT(ADDR(.got) - PAGE_OFFSET)
+ .got : AT(ADDR(.got) - LOAD_OFFSET)
{ *(.got.plt) *(.got) }
/* We want the small data sections together, so single-instruction offsets
can access them all, and initialized data all before uninitialized, so
we can shorten the on-disk segment size. */
- .sdata : AT(ADDR(.sdata) - PAGE_OFFSET)
+ .sdata : AT(ADDR(.sdata) - LOAD_OFFSET)
{ *(.sdata) }
_edata = .;
_bss = .;
- .sbss : AT(ADDR(.sbss) - PAGE_OFFSET)
+ .sbss : AT(ADDR(.sbss) - LOAD_OFFSET)
{ *(.sbss) *(.scommon) }
- .bss : AT(ADDR(.bss) - PAGE_OFFSET)
+ .bss : AT(ADDR(.bss) - LOAD_OFFSET)
{ *(.bss) *(COMMON) }
_end = .;
diff -ru l2569-mosberger/include/asm-ia64/pgtable.h l2569-aegl/include/asm-ia64/pgtable.h
--- l2569-mosberger/include/asm-ia64/pgtable.h Thu May 15 10:13:00 2003
+++ l2569-aegl/include/asm-ia64/pgtable.h Thu May 15 11:45:02 2003
@@ -207,7 +207,7 @@
#define RGN_MAP_LIMIT ((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE) /* per region addr limit */
#define RGN_KERNEL 7
-#define VMALLOC_START (0xa000000000000000 + 3*PERCPU_PAGE_SIZE)
+#define VMALLOC_START 0xa000000200000000
#define VMALLOC_VMADDR(x) ((unsigned long)(x))
#ifdef CONFIG_VIRTUAL_MEM_MAP
# define VMALLOC_END_INIT (0xa000000000000000 + (1UL << (4*PAGE_SHIFT - 9)))
@@ -450,7 +450,8 @@
* for zero-mapped memory areas etc..
*/
extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+extern struct page *zero_page_memmap_ptr;
+#define ZERO_PAGE(vaddr) (zero_page_memmap_ptr)
/* We provide our own get_unmapped_area to cope with VA holes for userland */
#define HAVE_ARCH_UNMAPPED_AREA
@@ -481,7 +482,6 @@
*/
#define KERNEL_TR_PAGE_SHIFT _PAGE_SIZE_64M
#define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT)
-#define KERNEL_TR_PAGE_NUM ((KERNEL_START - PAGE_OFFSET) / KERNEL_TR_PAGE_SIZE)
/*
* No page table caches to initialise
diff -ru l2569-mosberger/include/asm-ia64/processor.h l2569-aegl/include/asm-ia64/processor.h
--- l2569-mosberger/include/asm-ia64/processor.h Thu May 15 10:13:00 2003
+++ l2569-aegl/include/asm-ia64/processor.h Thu May 15 13:42:17 2003
@@ -929,6 +929,18 @@
return result;
}
+/*
+ * Take a mapped kernel address and return the equivalent address
+ * in the region 7 identity mapped virtual area.
+ */
+static inline void *
+ia64_imva (void *addr)
+{
+ void *result;
+ asm ("tpa %0=%1" : "=r"(result) : "r"(addr));
+ return __va(result);
+}
+
#define ARCH_HAS_PREFETCH
#define ARCH_HAS_PREFETCHW
#define ARCH_HAS_SPINLOCK_PREFETCH
diff -ru l2569-mosberger/include/asm-ia64/system.h l2569-aegl/include/asm-ia64/system.h
--- l2569-mosberger/include/asm-ia64/system.h Thu May 15 10:13:00 2003
+++ l2569-aegl/include/asm-ia64/system.h Thu May 15 11:45:02 2003
@@ -19,7 +19,7 @@
#include <asm/pal.h>
#include <asm/percpu.h>
-#define KERNEL_START (PAGE_OFFSET + 68*1024*1024)
+#define KERNEL_START (0xA000000100000000)
/* 0xa000000000000000 - 0xa000000000000000+PERCPU_MAX_SIZE remain unmapped */
#define PERCPU_ADDR (0xa000000000000000 + PERCPU_PAGE_SIZE)
^ permalink raw reply [flat|nested] 60+ messages in thread* [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (47 preceding siblings ...)
2003-05-15 21:43 ` Luck, Tony
@ 2003-05-16 22:33 ` Luck, Tony
2003-05-16 22:47 ` David Mosberger
` (9 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-16 22:33 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 657 bytes --]
Here's a first crack at the missing fs/proc/kcore.c
part of relocating the kernel.
Not as pretty as I'd hoped ... just having your kernel
at a lower address than your modules isn't sufficient
to fit with what the code currently does (which is to
assume that your kernel is in the 1-1 area above
PAGE_OFFSET).
Perhaps it might be cleaner to add the kernel to the
vmlist (as somebody suggested before)? That would get
rid of almost all the changes except one ... we'd still
need to set KCORE_BASE to the start address of region 5,
so any suggestions on how to do that in a cleaner way
than #ifdef CONFIG_IA64 gratefully accepted.
-Tony
[-- Attachment #2: kcore.c --]
[-- Type: application/octet-stream, Size: 11790 bytes --]
/*
* fs/proc/kcore.c kernel ELF/AOUT core dumper
*
* Modelled on fs/exec.c:aout_core_dump()
* Jeremy Fitzhardinge <jeremy@sw.oz.au>
* ELF version written by David Howells <David.Howells@nexor.co.uk>
* Modified and incorporated into 2.3.x by Tigran Aivazian <tigran@veritas.com>
* Support to dump vmalloc'd areas (ELF only), Tigran Aivazian <tigran@veritas.com>
* Safe accesses to vmalloc/direct-mapped discontiguous areas, Kanoj Sarcar <kanoj@sgi.com>
*/
#include <linux/config.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/user.h>
#include <linux/a.out.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <asm/uaccess.h>
#include <asm/io.h>
static int open_kcore(struct inode * inode, struct file * filp)
{
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
static ssize_t read_kcore(struct file *, char *, size_t, loff_t *);
struct file_operations proc_kcore_operations = {
read: read_kcore,
open: open_kcore,
};
#ifdef CONFIG_KCORE_AOUT
static ssize_t read_kcore(struct file *file, char *buf, size_t count, loff_t *ppos)
{
unsigned long long p = *ppos, memsize;
ssize_t read;
ssize_t count1;
char * pnt;
struct user dump;
#if defined (__i386__) || defined (__mc68000__) || defined(__x86_64__)
# define FIRST_MAPPED PAGE_SIZE /* we don't have page 0 mapped on x86.. */
#else
# define FIRST_MAPPED 0
#endif
memset(&dump, 0, sizeof(struct user));
dump.magic = CMAGIC;
dump.u_dsize = (virt_to_phys(high_memory) >> PAGE_SHIFT);
#if defined (__i386__) || defined(__x86_64__)
dump.start_code = PAGE_OFFSET;
#endif
#ifdef __alpha__
dump.start_data = PAGE_OFFSET;
#endif
memsize = virt_to_phys(high_memory);
if (p >= memsize)
return 0;
if (count > memsize - p)
count = memsize - p;
read = 0;
if (p < sizeof(struct user) && count > 0) {
count1 = count;
if (p + count1 > sizeof(struct user))
count1 = sizeof(struct user)-p;
pnt = (char *) &dump + p;
if (copy_to_user(buf,(void *) pnt, count1))
return -EFAULT;
buf += count1;
p += count1;
count -= count1;
read += count1;
}
if (count > 0 && p < PAGE_SIZE + FIRST_MAPPED) {
count1 = PAGE_SIZE + FIRST_MAPPED - p;
if (count1 > count)
count1 = count;
if (clear_user(buf, count1))
return -EFAULT;
buf += count1;
p += count1;
count -= count1;
read += count1;
}
if (count > 0) {
if (copy_to_user(buf, (void *) (PAGE_OFFSET+p-PAGE_SIZE), count))
return -EFAULT;
read += count;
}
*ppos += read;
return read;
}
#else /* CONFIG_KCORE_AOUT */
#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
/* An ELF note in memory */
struct memelfnote
{
const char *name;
int type;
unsigned int datasz;
void *data;
};
extern char saved_command_line[];
static size_t get_kcore_size(int *num_vma, size_t *elf_buflen)
{
size_t try, size;
struct vm_struct *m;
*num_vma = 0;
size = ((size_t)high_memory - PAGE_OFFSET + PAGE_SIZE);
if (!vmlist) {
*elf_buflen = PAGE_SIZE;
return (size);
}
for (m=vmlist; m; m=m->next) {
try = (size_t)m->addr + m->size;
if (try > size)
size = try;
*num_vma = *num_vma + 1;
}
*elf_buflen = sizeof(struct elfhdr) +
(*num_vma + 2)*sizeof(struct elf_phdr) +
3 * sizeof(struct memelfnote);
*elf_buflen = PAGE_ALIGN(*elf_buflen);
return (size - PAGE_OFFSET + *elf_buflen);
}
/*****************************************************************************/
/*
* determine size of ELF note
*/
static int notesize(struct memelfnote *en)
{
int sz;
sz = sizeof(struct elf_note);
sz += roundup(strlen(en->name), 4);
sz += roundup(en->datasz, 4);
return sz;
} /* end notesize() */
/*****************************************************************************/
/*
* store a note in the header buffer
*/
static char *storenote(struct memelfnote *men, char *bufp)
{
struct elf_note en;
#define DUMP_WRITE(addr,nr) do { memcpy(bufp,addr,nr); bufp += nr; } while(0)
en.n_namesz = strlen(men->name);
en.n_descsz = men->datasz;
en.n_type = men->type;
DUMP_WRITE(&en, sizeof(en));
DUMP_WRITE(men->name, en.n_namesz);
/* XXX - cast from long long to long to avoid need for libgcc.a */
bufp = (char*) roundup((unsigned long)bufp,4);
DUMP_WRITE(men->data, men->datasz);
bufp = (char*) roundup((unsigned long)bufp,4);
#undef DUMP_WRITE
return bufp;
} /* end storenote() */
/*
* store an ELF coredump header in the supplied buffer
* num_vma is the number of elements in vmlist
*/
static void elf_kcore_store_hdr(char *bufp, int num_vma, int dataoff)
{
struct elf_prstatus prstatus; /* NT_PRSTATUS */
struct elf_prpsinfo prpsinfo; /* NT_PRPSINFO */
struct elf_phdr *nhdr, *phdr;
struct elfhdr *elf;
struct memelfnote notes[3];
off_t offset = 0;
struct vm_struct *m;
/* setup ELF header */
elf = (struct elfhdr *) bufp;
bufp += sizeof(struct elfhdr);
offset += sizeof(struct elfhdr);
memcpy(elf->e_ident, ELFMAG, SELFMAG);
elf->e_ident[EI_CLASS] = ELF_CLASS;
elf->e_ident[EI_DATA] = ELF_DATA;
elf->e_ident[EI_VERSION]= EV_CURRENT;
memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
elf->e_type = ET_CORE;
elf->e_machine = ELF_ARCH;
elf->e_version = EV_CURRENT;
elf->e_entry = 0;
elf->e_phoff = sizeof(struct elfhdr);
elf->e_shoff = 0;
elf->e_flags = 0;
elf->e_ehsize = sizeof(struct elfhdr);
elf->e_phentsize= sizeof(struct elf_phdr);
elf->e_phnum = 2 + num_vma;
elf->e_shentsize= 0;
elf->e_shnum = 0;
elf->e_shstrndx = 0;
/* setup ELF PT_NOTE program header */
nhdr = (struct elf_phdr *) bufp;
bufp += sizeof(struct elf_phdr);
offset += sizeof(struct elf_phdr);
nhdr->p_type = PT_NOTE;
nhdr->p_offset = 0;
nhdr->p_vaddr = 0;
nhdr->p_paddr = 0;
nhdr->p_filesz = 0;
nhdr->p_memsz = 0;
nhdr->p_flags = 0;
nhdr->p_align = 0;
/* setup ELF PT_LOAD program header for the
* virtual range 0xc0000000 -> high_memory */
phdr = (struct elf_phdr *) bufp;
bufp += sizeof(struct elf_phdr);
offset += sizeof(struct elf_phdr);
phdr->p_type = PT_LOAD;
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = dataoff;
phdr->p_vaddr = PAGE_OFFSET;
phdr->p_paddr = __pa(PAGE_OFFSET);
phdr->p_filesz = phdr->p_memsz = ((unsigned long)high_memory - PAGE_OFFSET);
phdr->p_align = PAGE_SIZE;
/* setup ELF PT_LOAD program header for every vmalloc'd area */
for (m=vmlist; m; m=m->next) {
if (m->flags & VM_IOREMAP) /* don't dump ioremap'd stuff! (TA) */
continue;
phdr = (struct elf_phdr *) bufp;
bufp += sizeof(struct elf_phdr);
offset += sizeof(struct elf_phdr);
phdr->p_type = PT_LOAD;
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = (size_t)m->addr - PAGE_OFFSET + dataoff;
phdr->p_vaddr = (size_t)m->addr;
phdr->p_paddr = __pa(m->addr);
phdr->p_filesz = phdr->p_memsz = m->size;
phdr->p_align = PAGE_SIZE;
}
/*
* Set up the notes in similar form to SVR4 core dumps made
* with info from their /proc.
*/
nhdr->p_offset = offset;
/* set up the process status */
notes[0].name = "CORE";
notes[0].type = NT_PRSTATUS;
notes[0].datasz = sizeof(struct elf_prstatus);
notes[0].data = &prstatus;
memset(&prstatus, 0, sizeof(struct elf_prstatus));
nhdr->p_filesz = notesize(¬es[0]);
bufp = storenote(¬es[0], bufp);
/* set up the process info */
notes[1].name = "CORE";
notes[1].type = NT_PRPSINFO;
notes[1].datasz = sizeof(struct elf_prpsinfo);
notes[1].data = &prpsinfo;
memset(&prpsinfo, 0, sizeof(struct elf_prpsinfo));
prpsinfo.pr_state = 0;
prpsinfo.pr_sname = 'R';
prpsinfo.pr_zomb = 0;
strcpy(prpsinfo.pr_fname, "vmlinux");
strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ);
nhdr->p_filesz = notesize(¬es[1]);
bufp = storenote(¬es[1], bufp);
/* set up the task structure */
notes[2].name = "CORE";
notes[2].type = NT_TASKSTRUCT;
notes[2].datasz = sizeof(struct task_struct);
notes[2].data = current;
nhdr->p_filesz = notesize(¬es[2]);
bufp = storenote(¬es[2], bufp);
} /* end elf_kcore_store_hdr() */
/*****************************************************************************/
/*
* read from the ELF header and then kernel memory
*/
static ssize_t read_kcore(struct file *file, char *buffer, size_t buflen, loff_t *fpos)
{
ssize_t acc = 0;
size_t size, tsz;
size_t elf_buflen;
int num_vma;
unsigned long start;
read_lock(&vmlist_lock);
proc_root_kcore->size = size = get_kcore_size(&num_vma, &elf_buflen);
if (buflen == 0 || *fpos >= size) {
read_unlock(&vmlist_lock);
return 0;
}
/* trim buflen to not go beyond EOF */
if (buflen > size - *fpos)
buflen = size - *fpos;
/* construct an ELF core header if we'll need some of it */
if (*fpos < elf_buflen) {
char * elf_buf;
tsz = elf_buflen - *fpos;
if (buflen < tsz)
tsz = buflen;
elf_buf = kmalloc(elf_buflen, GFP_ATOMIC);
if (!elf_buf) {
read_unlock(&vmlist_lock);
return -ENOMEM;
}
memset(elf_buf, 0, elf_buflen);
elf_kcore_store_hdr(elf_buf, num_vma, elf_buflen);
read_unlock(&vmlist_lock);
if (copy_to_user(buffer, elf_buf + *fpos, tsz)) {
kfree(elf_buf);
return -EFAULT;
}
kfree(elf_buf);
buflen -= tsz;
*fpos += tsz;
buffer += tsz;
acc += tsz;
/* leave now if filled buffer already */
if (buflen == 0)
return acc;
} else
read_unlock(&vmlist_lock);
/* where page 0 not mapped, write zeros into buffer */
#if defined (__i386__) || defined (__mc68000__) || defined(__x86_64__)
if (*fpos < PAGE_SIZE + elf_buflen) {
/* work out how much to clear */
tsz = PAGE_SIZE + elf_buflen - *fpos;
if (buflen < tsz)
tsz = buflen;
/* write zeros to buffer */
if (clear_user(buffer, tsz))
return -EFAULT;
buflen -= tsz;
*fpos += tsz;
buffer += tsz;
acc += tsz;
/* leave now if filled buffer already */
if (buflen == 0)
return tsz;
}
#endif
/*
* Fill the remainder of the buffer from kernel VM space.
* We said in the ELF header that the data which starts
* at 'elf_buflen' is virtual address PAGE_OFFSET. --rmk
*/
start = PAGE_OFFSET + (*fpos - elf_buflen);
if ((tsz = (PAGE_SIZE - (start & ~PAGE_MASK))) > buflen)
tsz = buflen;
while (buflen) {
if ((start >= VMALLOC_START) && (start < VMALLOC_END)) {
char * elf_buf;
struct vm_struct *m;
unsigned long curstart = start;
unsigned long cursize = tsz;
elf_buf = kmalloc(tsz, GFP_KERNEL);
if (!elf_buf)
return -ENOMEM;
memset(elf_buf, 0, tsz);
read_lock(&vmlist_lock);
for (m=vmlist; m && cursize; m=m->next) {
unsigned long vmstart;
unsigned long vmsize;
unsigned long msize = m->size - PAGE_SIZE;
if (((unsigned long)m->addr + msize) <
curstart)
continue;
if ((unsigned long)m->addr > (curstart +
cursize))
break;
vmstart = (curstart < (unsigned long)m->addr ?
(unsigned long)m->addr : curstart);
if (((unsigned long)m->addr + msize) >
(curstart + cursize))
vmsize = curstart + cursize - vmstart;
else
vmsize = (unsigned long)m->addr +
msize - vmstart;
curstart = vmstart + vmsize;
cursize -= vmsize;
/* don't dump ioremap'd stuff! (TA) */
if (m->flags & VM_IOREMAP)
continue;
memcpy(elf_buf + (vmstart - start),
(char *)vmstart, vmsize);
}
read_unlock(&vmlist_lock);
if (copy_to_user(buffer, elf_buf, tsz)) {
kfree(elf_buf);
return -EFAULT;
}
kfree(elf_buf);
} else if ((start > PAGE_OFFSET) && (start <
(unsigned long)high_memory)) {
if (kern_addr_valid(start)) {
if (copy_to_user(buffer, (char *)start, tsz))
return -EFAULT;
} else {
if (clear_user(buffer, tsz))
return -EFAULT;
}
} else {
if (clear_user(buffer, tsz))
return -EFAULT;
}
buflen -= tsz;
*fpos += tsz;
buffer += tsz;
acc += tsz;
start += tsz;
tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
}
return acc;
}
#endif /* CONFIG_KCORE_AOUT */
^ permalink raw reply [flat|nested] 60+ messages in thread* [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (48 preceding siblings ...)
2003-05-16 22:33 ` Luck, Tony
@ 2003-05-16 22:47 ` David Mosberger
2003-05-16 22:54 ` [Linux-ia64] " Luck, Tony
` (8 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-16 22:47 UTC (permalink / raw)
To: linux-ia64
>>>>> On Fri, 16 May 2003 15:33:30 -0700, "Luck, Tony" <tony.luck@intel.com> said:
Tony> Here's a first crack at the missing fs/proc/kcore.c part of
Tony> relocating the kernel.
Tony> Not as pretty as I'd hoped ... just having your kernel at a
Tony> lower address than your modules isn't sufficient to fit with
Tony> what the code currently does (which is to assume that your
Tony> kernel is in the 1-1 area above PAGE_OFFSET).
Tony> Perhaps it might be cleaner to add the kernel to the vmlist
Tony> (as somebody suggested before)? That would get rid of almost
Tony> all the changes except one ... we'd still need to set
Tony> KCORE_BASE to the start address of region 5, so any
Tony> suggestions on how to do that in a cleaner way than #ifdef
Tony> CONFIG_IA64 gratefully accepted.
Well, the patch looks fine to me, so I'm probably the wrong person to
ask. Might be good to bring this up on linux-kernel.
Was there a particular reason to send the full file instead of just a
diff? Also, your file seems to revert the ANSI 99 initializer changes
for no good reason.
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* [Linux-ia64] RE: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (49 preceding siblings ...)
2003-05-16 22:47 ` David Mosberger
@ 2003-05-16 22:54 ` Luck, Tony
2003-05-16 22:58 ` David Mosberger
` (7 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-16 22:54 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 509 bytes --]
> Well, the patch looks fine to me, so I'm probably the wrong person to
> ask. Might be good to bring this up on linux-kernel.
>
> Was there a particular reason to send the full file instead of just a
> diff? Also, your file seems to revert the ANSI 99 initializer changes
> for no good reason.
It's the wrong file ... that's from last November ... it just happened
to be in the folder on my PC where I dump things to attach to e-mail. I
meant to pick up the kcore.diff (now attached).
-Tony
[-- Attachment #2: kcore.diff --]
[-- Type: application/octet-stream, Size: 2375 bytes --]
diff -ru l2569-mosberger/fs/proc/kcore.c l2569-aegl/fs/proc/kcore.c
--- l2569-mosberger/fs/proc/kcore.c Thu May 15 10:12:56 2003
+++ l2569-aegl/fs/proc/kcore.c Fri May 16 15:00:17 2003
@@ -99,7 +99,13 @@
}
#else /* CONFIG_KCORE_AOUT */
+#ifdef CONFIG_IA64
+#define KCORE_BASE 0xA000000000000000UL
+#else
#define KCORE_BASE PAGE_OFFSET
+#endif
+
+extern char _stext[], _end[];
#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
@@ -133,7 +139,7 @@
*num_vma = *num_vma + 1;
}
*elf_buflen = sizeof(struct elfhdr) +
- (*num_vma + 2)*sizeof(struct elf_phdr) +
+ (*num_vma + 3)*sizeof(struct elf_phdr) +
3 * sizeof(struct memelfnote);
*elf_buflen = PAGE_ALIGN(*elf_buflen);
return size + *elf_buflen;
@@ -214,7 +220,7 @@
elf->e_flags = 0;
elf->e_ehsize = sizeof(struct elfhdr);
elf->e_phentsize= sizeof(struct elf_phdr);
- elf->e_phnum = 2 + num_vma;
+ elf->e_phnum = 3 + num_vma;
elf->e_shentsize= 0;
elf->e_shnum = 0;
elf->e_shstrndx = 0;
@@ -245,6 +251,27 @@
phdr->p_filesz = phdr->p_memsz = ((unsigned long)high_memory - PAGE_OFFSET);
phdr->p_align = PAGE_SIZE;
+ phdr = (struct elf_phdr *) bufp;
+ bufp += sizeof(struct elf_phdr);
+ offset += sizeof(struct elf_phdr);
+ if ((unsigned long)_end < PAGE_OFFSET) {
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_offset = (unsigned long)_stext - KCORE_BASE + dataoff;
+ phdr->p_vaddr = (unsigned long)_stext;
+ phdr->p_paddr = ia64_tpa((unsigned long)_stext);
+ phdr->p_filesz = phdr->p_memsz = _end - _stext;
+ phdr->p_align = PAGE_SIZE;
+ } else {
+ phdr->p_type = PT_NULL;
+ phdr->p_flags = 0;
+ phdr->p_offset = 0;
+ phdr->p_vaddr = 0;
+ phdr->p_paddr = 0;
+ phdr->p_filesz = 0;
+ phdr->p_align = 0;
+ }
+
/* setup ELF PT_LOAD program header for every vmalloc'd area */
for (m=vmlist; m; m=m->next) {
if (m->flags & VM_IOREMAP) /* don't dump ioremap'd stuff! (TA) */
@@ -444,6 +471,15 @@
if (kern_addr_valid(start)) {
if (copy_to_user(buffer, (char *)start, tsz))
return -EFAULT;
+ } else {
+ if (clear_user(buffer, tsz))
+ return -EFAULT;
+ }
+ } else if ((start > (unsigned long)_stext) && (start <
+ (unsigned long)_end)) {
+ if (kern_addr_valid(start)) {
+ if (copy_to_user(buffer, (char *)start, tsz))
+ return -EFAULT;
} else {
if (clear_user(buffer, tsz))
return -EFAULT;
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] RE: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (50 preceding siblings ...)
2003-05-16 22:54 ` [Linux-ia64] " Luck, Tony
@ 2003-05-16 22:58 ` David Mosberger
2003-05-19 17:57 ` Luck, Tony
` (6 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-16 22:58 UTC (permalink / raw)
To: linux-ia64
>>>>> On Fri, 16 May 2003 15:54:27 -0700, "Luck, Tony" <tony.luck@intel.com> said:
Tony> It's the wrong file ... that's from last November ... it just
Tony> happened to be in the folder on my PC where I dump things to
Tony> attach to e-mail. I meant to pick up the kcore.diff (now
Tony> attached).
Ah, I see.
Definitely something to be discussed on the lkml. I suspect it would
be better to avoid creating the PT_NULL header when it's not needed.
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] RE: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (51 preceding siblings ...)
2003-05-16 22:58 ` David Mosberger
@ 2003-05-19 17:57 ` Luck, Tony
2003-05-19 18:02 ` Jesse Barnes
` (5 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-19 17:57 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 1058 bytes --]
Here's what the kcore change looks like if I take the suggestion
that Andi Kleen made on LKML
http://marc.theaimsgroup.com/?l=linux-kernel&m=103556903730989&w=2
to add the kernel to the vmlist. Combined with the KCORE_BASE
change (which is 98% included) we avoid the negative addresses
that required a bunch more changes in Andi's patch.
I've juggled the addresses around again, moving the kernel up to
0xA000004000000000 and VMALLOC_START back down to 0xA000000000030000
so that the entry for the kernel goes on the *end* of the vmlist,
so we don't have to uselessly step over it on every call to vmalloc().
I picked that kernel start address as it is half-way between
VMALLOC_START and VMALLOC_END for a PAGE_SIZE=4k kernel. There
are many alternatives ... choose a different one (between VMALLOC_START
and VMALLOC_END) if you come up with a good reason for a different
address.
This builds and boots on Tiger, and now I can run:
# gdb vmlinux /proc/kcore
(gdb) x/s log_buf
and see the contents of kernel memory.
-Tony
[-- Attachment #2: kcore2.diff --]
[-- Type: application/octet-stream, Size: 2672 bytes --]
diff -ru l2569-mosberger/arch/ia64/kernel/setup.c l2569-aegl/arch/ia64/kernel/setup.c
--- l2569-mosberger/arch/ia64/kernel/setup.c Fri May 16 11:46:57 2003
+++ l2569-aegl/arch/ia64/kernel/setup.c Fri May 16 17:11:18 2003
@@ -32,6 +32,7 @@
#include <linux/tty.h>
#include <linux/efi.h>
#include <linux/initrd.h>
+#include <linux/vmalloc.h>
#include <asm/ia32.h>
#include <asm/page.h>
@@ -47,7 +48,7 @@
# error "struct cpuinfo_ia64 too big!"
#endif
-extern char _end;
+extern char _stext, _end;
#ifdef CONFIG_SMP
unsigned long __per_cpu_offset[NR_CPUS];
@@ -393,6 +394,18 @@
{
extern unsigned long ia64_iobase;
unsigned long phys_iobase;
+ static struct vm_struct kern;
+ unsigned long s, e;
+
+ /* Add entry to vmlist for the kernel (round out to how much is mapped) */
+ s = (unsigned long)&_stext;
+ s &= ~(KERNEL_TR_PAGE_SIZE-1);
+ e = (unsigned long)&_end;
+ e = (e + KERNEL_TR_PAGE_SIZE-1) & ~(KERNEL_TR_PAGE_SIZE-1);
+ kern.addr = (void *)s;
+ kern.size = e - s;
+ kern.next = vmlist;
+ vmlist = &kern;
unw_init();
diff -ru l2569-mosberger/fs/proc/kcore.c l2569-aegl/fs/proc/kcore.c
--- l2569-mosberger/fs/proc/kcore.c Thu May 15 10:12:56 2003
+++ l2569-aegl/fs/proc/kcore.c Mon May 19 08:57:17 2003
@@ -99,7 +99,9 @@
}
#else /* CONFIG_KCORE_AOUT */
+#ifndef KCORE_BASE
#define KCORE_BASE PAGE_OFFSET
+#endif
#define roundup(x, y) ((((x)+((y)-1))/(y))*(y))
diff -ru l2569-mosberger/include/asm-ia64/pgtable.h l2569-aegl/include/asm-ia64/pgtable.h
--- l2569-mosberger/include/asm-ia64/pgtable.h Fri May 16 11:46:57 2003
+++ l2569-aegl/include/asm-ia64/pgtable.h Mon May 19 09:04:30 2003
@@ -207,7 +207,8 @@
#define RGN_MAP_LIMIT ((1UL << (4*PAGE_SHIFT - 12)) - PAGE_SIZE) /* per region addr limit */
#define RGN_KERNEL 7
-#define VMALLOC_START 0xa000000200000000
+#define KCORE_BASE 0xa000000000000000
+#define VMALLOC_START (0xa000000000000000 + 3*PERCPU_PAGE_SIZE)
#define VMALLOC_VMADDR(x) ((unsigned long)(x))
#ifdef CONFIG_VIRTUAL_MEM_MAP
# define VMALLOC_END_INIT (0xa000000000000000 + (1UL << (4*PAGE_SHIFT - 9)))
diff -ru l2569-mosberger/include/asm-ia64/system.h l2569-aegl/include/asm-ia64/system.h
--- l2569-mosberger/include/asm-ia64/system.h Fri May 16 11:46:57 2003
+++ l2569-aegl/include/asm-ia64/system.h Mon May 19 09:05:25 2003
@@ -19,7 +19,7 @@
#include <asm/pal.h>
#include <asm/percpu.h>
-#define KERNEL_START (0xa000000100000000)
+#define KERNEL_START (0xa000004000000000)
/* 0xa000000000000000 - 0xa000000000000000+PERCPU_MAX_SIZE remain unmapped */
#define PERCPU_ADDR (0xa000000000000000 + PERCPU_PAGE_SIZE)
^ permalink raw reply [flat|nested] 60+ messages in thread* Re: [Linux-ia64] RE: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (52 preceding siblings ...)
2003-05-19 17:57 ` Luck, Tony
@ 2003-05-19 18:02 ` Jesse Barnes
2003-05-19 18:39 ` David Mosberger
` (4 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Jesse Barnes @ 2003-05-19 18:02 UTC (permalink / raw)
To: linux-ia64
Btw, last Friday I tested David's bk tree (plus one additional console
driver patch) on an sn2, and it booted to the point where we got lots
of printks, so your head.S stuff seems fine for us so far. I'll let
you know if I run into anything more, but I won't be able to test the
kcore stuff until I have it booting and mounting root.
Thanks,
Jesse
On Mon, May 19, 2003 at 10:57:03AM -0700, Luck, Tony wrote:
> Here's what the kcore change looks like if I take the suggestion
> that Andi Kleen made on LKML
> http://marc.theaimsgroup.com/?l=linux-kernel&m\x103556903730989&w=2
> to add the kernel to the vmlist. Combined with the KCORE_BASE
> change (which is 98% included) we avoid the negative addresses
> that required a bunch more changes in Andi's patch.
>
> I've juggled the addresses around again, moving the kernel up to
> 0xA000004000000000 and VMALLOC_START back down to 0xA000000000030000
> so that the entry for the kernel goes on the *end* of the vmlist,
> so we don't have to uselessly step over it on every call to vmalloc().
>
> I picked that kernel start address as it is half-way between
> VMALLOC_START and VMALLOC_END for a PAGE_SIZE=4k kernel. There
> are many alternatives ... choose a different one (between VMALLOC_START
> and VMALLOC_END) if you come up with a good reason for a different
> address.
>
> This builds and boots on Tiger, and now I can run:
>
> # gdb vmlinux /proc/kcore
> (gdb) x/s log_buf
>
> and see the contents of kernel memory.
>
> -Tony
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] RE: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (53 preceding siblings ...)
2003-05-19 18:02 ` Jesse Barnes
@ 2003-05-19 18:39 ` David Mosberger
2003-05-19 19:07 ` Luck, Tony
` (3 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: David Mosberger @ 2003-05-19 18:39 UTC (permalink / raw)
To: linux-ia64
>>>>> On Mon, 19 May 2003 10:57:03 -0700, "Luck, Tony" <tony.luck@intel.com> said:
Tony> I've juggled the addresses around again, moving the kernel up to
Tony> 0xA000004000000000 and VMALLOC_START back down to 0xA000000000030000
Tony> so that the entry for the kernel goes on the *end* of the vmlist,
Tony> so we don't have to uselessly step over it on every call to vmalloc().
I don't want the kernel layout to be constrained by something as
esoteric as kcore. Let's fix kcore for good.
--david
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] RE: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (54 preceding siblings ...)
2003-05-19 18:39 ` David Mosberger
@ 2003-05-19 19:07 ` Luck, Tony
2003-05-28 19:10 ` Luck, Tony
` (2 subsequent siblings)
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-19 19:07 UTC (permalink / raw)
To: linux-ia64
> I don't want the kernel layout to be constrained by something as
> esoteric as kcore. Let's fix kcore for good.
I like the vmlist solution because it is extensible (e.g. when
I add another address range to the kernel map to split text and
data so that I can replicate the text across nodes, I'll just
add a new entry to vmlist, and the kcore.c code won't be changed
at all). But as you point out, this also puts some limits on
kernel layout as objects on the vmlist have to be bounded by
VMALLOC_START and VMALLOC_END.
To remove this constraint, we either:
1) need a parallel list to keep tabs on interesting objects
that are not in vmlist.
2) need to remove the restriction that objects on vmlist are
in the [VMALLOC_START, VMALLOC_END) address range.
Answer #1 seems like overkill since we only have one object for
the list now, with a tentative plan for a second when kernel text
replication gets implemented. It also ends up duplicating all the
code in kcore.c that walks vmlist.
Answer #2 would end up with more extensive changes to fs/proc/kcore.c
and also changes to another generic file (mm.vmalloc.c) to make sure
that the out-of-range items on vmlist didn't confuse vmalloc().
I think that #2 looks less messy ... unless someone comes up
with answer #3
-Tony
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] RE: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (55 preceding siblings ...)
2003-05-19 19:07 ` Luck, Tony
@ 2003-05-28 19:10 ` Luck, Tony
2003-05-28 20:05 ` Luck, Tony
2003-05-28 20:13 ` Luck, Tony
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-28 19:10 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 368 bytes --]
This patch provides a more general mechanism for
replacing a virtual address with physical in a
movl reg=object
instruction. The two existing instances in ivt.S
where the address of swapper_pg_dir are converted
to use the new mechanism.
More instances are going to be needed in the MCA
code, where it may not be safe to use "tpa" instruction.
-Tony
[-- Attachment #2: reloc.patch --]
[-- Type: application/octet-stream, Size: 5652 bytes --]
diff -ru l2569-mosberger/arch/ia64/kernel/ivt.S l2569-reloc/arch/ia64/kernel/ivt.S
--- l2569-mosberger/arch/ia64/kernel/ivt.S Mon May 19 09:40:04 2003
+++ l2569-reloc/arch/ia64/kernel/ivt.S Wed May 28 09:37:02 2003
@@ -122,12 +122,9 @@
shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
- .global ia64_ivt_patch1
-ia64_ivt_patch1:
-{ .mlx // we patch this bundle to include physical address of swapper_pg_dir
- srlz.d // ensure "rsm psr.dt" has taken effect
-(p6) movl r19=swapper_pg_dir // region 5 is rooted at swapper_pg_dir
-}
+
+ LOAD_PHYSICAL(srlz.d, p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+
.pred.rel "mutex", p6, p7
(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
@@ -420,12 +417,9 @@
shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
- .global ia64_ivt_patch2
-ia64_ivt_patch2:
-{ .mlx // we patch this bundle to include physical address of swapper_pg_dir
- srlz.d // ensure "rsm psr.dt" has taken effect
-(p6) movl r19=swapper_pg_dir // region 5 is rooted at swapper_pg_dir
-}
+
+ LOAD_PHYSICAL(srlz.d, p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+
.pred.rel "mutex", p6, p7
(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
diff -ru l2569-mosberger/arch/ia64/kernel/setup.c l2569-reloc/arch/ia64/kernel/setup.c
--- l2569-mosberger/arch/ia64/kernel/setup.c Wed May 28 10:53:48 2003
+++ l2569-reloc/arch/ia64/kernel/setup.c Wed May 28 12:02:43 2003
@@ -364,37 +364,35 @@
* xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB
*/
static void __init
-patch_ivt_with_phys_swapper_pg_dir(void)
+patch_physical(void)
{
- extern char ia64_ivt_patch1[], ia64_ivt_patch2[];
- unsigned long spd = ia64_tpa((__u64)swapper_pg_dir);
- unsigned long *p;
-
- p = (unsigned long *)ia64_imva(ia64_ivt_patch1);
-
- *p = (*p & 0x3fffffffffffUL) |
- ((spd & 0x000000ffffc00000UL)<<24);
- p++;
- *p = (*p & 0xf000080fff800000UL) |
- ((spd & 0x8000000000000000UL) >> 4) |
- ((spd & 0x7fffff0000000000UL) >> 40) |
- ((spd & 0x00000000001f0000UL) << 29) |
- ((spd & 0x0000000000200000UL) << 23) |
- ((spd & 0x000000000000ff80UL) << 43) |
- ((spd & 0x000000000000007fUL) << 36);
-
- p = (unsigned long *)ia64_imva(ia64_ivt_patch2);
-
- *p = (*p & 0x3fffffffffffUL) |
- ((spd & 0x000000ffffc00000UL)<<24);
- p++;
- *p = (*p & 0xf000080fff800000UL) |
- ((spd & 0x8000000000000000UL) >> 4) |
- ((spd & 0x7fffff0000000000UL) >> 40) |
- ((spd & 0x00000000001f0000UL) << 29) |
- ((spd & 0x0000000000200000UL) << 23) |
- ((spd & 0x000000000000ff80UL) << 43) |
- ((spd & 0x000000000000007fUL) << 36);
+ extern unsigned long *__start___vtop_patchlist[], *__end____vtop_patchlist[];
+ unsigned long **e, *p, paddr, vaddr;
+
+ for (e = __start___vtop_patchlist; e < __end____vtop_patchlist; e++) {
+ p = *e;
+
+ vaddr = ((p[1] & 0x0800000000000000UL) << 4) | /*A*/
+ ((p[1] & 0x00000000007fffffUL) << 40) | /*B*/
+ ((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/
+ ((p[1] & 0x0000100000000000UL) >> 23) | /*D*/
+ ((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/
+ ((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/
+ ((p[1] & 0x000007f000000000UL) >> 36); /*G*/
+
+ paddr = ia64_tpa(vaddr);
+
+ *p = (*p & 0x3fffffffffffUL) |
+ ((paddr & 0x000000ffffc00000UL)<<24); /*C*/
+ p++;
+ *p = (*p & 0xf000080fff800000UL) |
+ ((paddr & 0x8000000000000000UL) >> 4) | /*A*/
+ ((paddr & 0x7fffff0000000000UL) >> 40) | /*B*/
+ ((paddr & 0x0000000000200000UL) << 23) | /*D*/
+ ((paddr & 0x00000000001f0000UL) << 29) | /*E*/
+ ((paddr & 0x000000000000ff80UL) << 43) | /*F*/
+ ((paddr & 0x000000000000007fUL) << 36); /*G*/
+ }
}
@@ -406,7 +404,7 @@
unw_init();
- patch_ivt_with_phys_swapper_pg_dir();
+ patch_physical();
*cmdline_p = __va(ia64_boot_param->command_line);
strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
diff -ru l2569-mosberger/arch/ia64/vmlinux.lds.S l2569-reloc/arch/ia64/vmlinux.lds.S
--- l2569-mosberger/arch/ia64/vmlinux.lds.S Mon May 19 09:40:03 2003
+++ l2569-reloc/arch/ia64/vmlinux.lds.S Wed May 28 09:52:44 2003
@@ -55,6 +55,13 @@
__stop___ex_table = .;
}
+ __vtop_patchlist : AT(ADDR(__vtop_patchlist) - LOAD_OFFSET)
+ {
+ __start___vtop_patchlist = .;
+ *(__vtop_patchlist)
+ __end____vtop_patchlist = .;
+ }
+
__mckinley_e9_bundles : AT(ADDR(__mckinley_e9_bundles) - LOAD_OFFSET)
{
__start___mckinley_e9_bundles = .;
diff -ru l2569-mosberger/include/asm-ia64/asmmacro.h l2569-reloc/include/asm-ia64/asmmacro.h
--- l2569-mosberger/include/asm-ia64/asmmacro.h Mon May 19 09:41:01 2003
+++ l2569-reloc/include/asm-ia64/asmmacro.h Wed May 28 09:48:20 2003
@@ -51,6 +51,22 @@
[99:] x
/*
+ * Mark instructions that need a load of a virtual address patched to be
+ * a load of a physical address. We use this either in critical performance
+ * path (ivt.S - TLB miss processing) or in places where it might not be
+ * safe to use a "tpa" instruction (mca_asm.S - error recovery).
+ */
+ .section "__vtop_patchlist", "a" // declare section & section attributes
+ .previous
+
+#define LOAD_PHYSICAL(op, preg, reg, obj) \
+1: { .mlx; \
+ op; \
+(preg) movl reg = obj; \
+ }; \
+ .xdata8 "__vtop_patchlist", 1b
+
+/*
* For now, we always put in the McKinley E9 workaround. On CPUs that don't need it,
* we'll patch out the work-around bundles with NOPs, so their impact is minimal.
*/
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] RE: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (56 preceding siblings ...)
2003-05-28 19:10 ` Luck, Tony
@ 2003-05-28 20:05 ` Luck, Tony
2003-05-28 20:13 ` Luck, Tony
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-28 20:05 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 77 bytes --]
Patch in previous post is incomplete ... here's the
full version.
-Tony
[-- Attachment #2: reloc.patch --]
[-- Type: application/octet-stream, Size: 6941 bytes --]
diff -ru l2569-mosberger/arch/ia64/kernel/ivt.S l2569-reloc/arch/ia64/kernel/ivt.S
--- l2569-mosberger/arch/ia64/kernel/ivt.S Mon May 19 09:40:04 2003
+++ l2569-reloc/arch/ia64/kernel/ivt.S Wed May 28 09:37:02 2003
@@ -122,12 +122,9 @@
shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
- .global ia64_ivt_patch1
-ia64_ivt_patch1:
-{ .mlx // we patch this bundle to include physical address of swapper_pg_dir
- srlz.d // ensure "rsm psr.dt" has taken effect
-(p6) movl r19=swapper_pg_dir // region 5 is rooted at swapper_pg_dir
-}
+
+ LOAD_PHYSICAL(srlz.d, p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+
.pred.rel "mutex", p6, p7
(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
@@ -420,12 +417,9 @@
shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address
;;
(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
- .global ia64_ivt_patch2
-ia64_ivt_patch2:
-{ .mlx // we patch this bundle to include physical address of swapper_pg_dir
- srlz.d // ensure "rsm psr.dt" has taken effect
-(p6) movl r19=swapper_pg_dir // region 5 is rooted at swapper_pg_dir
-}
+
+ LOAD_PHYSICAL(srlz.d, p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
+
.pred.rel "mutex", p6, p7
(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
diff -ru l2569-mosberger/arch/ia64/kernel/setup.c l2569-reloc/arch/ia64/kernel/setup.c
--- l2569-mosberger/arch/ia64/kernel/setup.c Wed May 28 13:03:29 2003
+++ l2569-reloc/arch/ia64/kernel/setup.c Wed May 28 12:02:43 2003
@@ -348,43 +348,51 @@
}
/*
- * There are two places in the performance critical path of
- * the exception handling code where we need to know the physical
- * address of the swapper_pg_dir structure. This routine
- * patches the "movl" instructions to load the value needed.
+ * We need sometimes to load the physical address of a kernel
+ * object. Often we can convert the virtual address to physical
+ * at execution time, but sometimes (either for performance reasons
+ * or during error recovery) we cannot to this. Patch the marked
+ * bundles to load the physical address.
+ * The 64-bit value in a "movl reg=value" is scattered between the
+ * two words of the bundle like this:
+ *
+ * 6 6 5 4 3 2 1
+ * 3210987654321098765432109876543210987654321098765432109876543210
+ * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG
+ *
+ * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB
*/
static void __init
-patch_ivt_with_phys_swapper_pg_dir(void)
+patch_physical(void)
{
- extern char ia64_ivt_patch1[], ia64_ivt_patch2[];
- unsigned long spd = ia64_tpa((__u64)swapper_pg_dir);
- unsigned long *p;
-
- p = (unsigned long *)ia64_imva(ia64_ivt_patch1);
-
- *p = (*p & 0x3fffffffffffUL) |
- ((spd & 0x000000ffffc00000UL)<<24);
- p++;
- *p = (*p & 0xf000080fff800000UL) |
- ((spd & 0x8000000000000000UL) >> 4) |
- ((spd & 0x7fffff0000000000UL) >> 40) |
- ((spd & 0x00000000001f0000UL) << 29) |
- ((spd & 0x0000000000200000UL) << 23) |
- ((spd & 0x000000000000ff80UL) << 43) |
- ((spd & 0x000000000000007fUL) << 36);
-
- p = (unsigned long *)ia64_imva(ia64_ivt_patch2);
-
- *p = (*p & 0x3fffffffffffUL) |
- ((spd & 0x000000ffffc00000UL)<<24);
- p++;
- *p = (*p & 0xf000080fff800000UL) |
- ((spd & 0x8000000000000000UL) >> 4) |
- ((spd & 0x7fffff0000000000UL) >> 40) |
- ((spd & 0x00000000001f0000UL) << 29) |
- ((spd & 0x0000000000200000UL) << 23) |
- ((spd & 0x000000000000ff80UL) << 43) |
- ((spd & 0x000000000000007fUL) << 36);
+ extern unsigned long *__start___vtop_patchlist[], *__end____vtop_patchlist[];
+ unsigned long **e, *p, paddr, vaddr;
+
+ for (e = __start___vtop_patchlist; e < __end____vtop_patchlist; e++) {
+ p = *e;
+
+ vaddr = ((p[1] & 0x0800000000000000UL) << 4) | /*A*/
+ ((p[1] & 0x00000000007fffffUL) << 40) | /*B*/
+ ((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/
+ ((p[1] & 0x0000100000000000UL) >> 23) | /*D*/
+ ((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/
+ ((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/
+ ((p[1] & 0x000007f000000000UL) >> 36); /*G*/
+
+ paddr = ia64_tpa(vaddr);
+
+ *p = (*p & 0x3fffffffffffUL) |
+ ((paddr & 0x000000ffffc00000UL)<<24); /*C*/
+ p++;
+ *p = (*p & 0xf000080fff800000UL) |
+ ((paddr & 0x8000000000000000UL) >> 4) | /*A*/
+ ((paddr & 0x7fffff0000000000UL) >> 40) | /*B*/
+ ((paddr & 0x0000000000200000UL) << 23) | /*D*/
+ ((paddr & 0x00000000001f0000UL) << 29) | /*E*/
+ ((paddr & 0x000000000000ff80UL) << 43) | /*F*/
+ ((paddr & 0x000000000000007fUL) << 36); /*G*/
+ }
}
@@ -396,7 +404,7 @@
unw_init();
- patch_ivt_with_phys_swapper_pg_dir();
+ patch_physical();
*cmdline_p = __va(ia64_boot_param->command_line);
strncpy(saved_command_line, *cmdline_p, sizeof(saved_command_line));
@@ -779,8 +787,6 @@
/* Clear the stack memory reserved for pt_regs: */
memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
- ia64_set_kr(IA64_KR_FPU_OWNER, 0);
-
/*
* Initialize default control register to defer all speculative faults. The
* kernel MUST NOT depend on a particular setting of these bits (in other words,
diff -ru l2569-mosberger/arch/ia64/vmlinux.lds.S l2569-reloc/arch/ia64/vmlinux.lds.S
--- l2569-mosberger/arch/ia64/vmlinux.lds.S Mon May 19 09:40:03 2003
+++ l2569-reloc/arch/ia64/vmlinux.lds.S Wed May 28 09:52:44 2003
@@ -55,6 +55,13 @@
__stop___ex_table = .;
}
+ __vtop_patchlist : AT(ADDR(__vtop_patchlist) - LOAD_OFFSET)
+ {
+ __start___vtop_patchlist = .;
+ *(__vtop_patchlist)
+ __end____vtop_patchlist = .;
+ }
+
__mckinley_e9_bundles : AT(ADDR(__mckinley_e9_bundles) - LOAD_OFFSET)
{
__start___mckinley_e9_bundles = .;
diff -ru l2569-mosberger/include/asm-ia64/asmmacro.h l2569-reloc/include/asm-ia64/asmmacro.h
--- l2569-mosberger/include/asm-ia64/asmmacro.h Mon May 19 09:41:01 2003
+++ l2569-reloc/include/asm-ia64/asmmacro.h Wed May 28 09:48:20 2003
@@ -51,6 +51,22 @@
[99:] x
/*
+ * Mark instructions that need a load of a virtual address patched to be
+ * a load of a physical address. We use this either in critical performance
+ * path (ivt.S - TLB miss processing) or in places where it might not be
+ * safe to use a "tpa" instruction (mca_asm.S - error recovery).
+ */
+ .section "__vtop_patchlist", "a" // declare section & section attributes
+ .previous
+
+#define LOAD_PHYSICAL(op, preg, reg, obj) \
+1: { .mlx; \
+ op; \
+(preg) movl reg = obj; \
+ }; \
+ .xdata8 "__vtop_patchlist", 1b
+
+/*
* For now, we always put in the McKinley E9 workaround. On CPUs that don't need it,
* we'll patch out the work-around bundles with NOPs, so their impact is minimal.
*/
^ permalink raw reply [flat|nested] 60+ messages in thread* RE: [Linux-ia64] RE: [PATCH] head.S fix for unusual load addrs
2003-04-17 23:05 [Linux-ia64] Re: [PATCH] head.S fix for unusual load addrs David Mosberger
` (57 preceding siblings ...)
2003-05-28 20:05 ` Luck, Tony
@ 2003-05-28 20:13 ` Luck, Tony
58 siblings, 0 replies; 60+ messages in thread
From: Luck, Tony @ 2003-05-28 20:13 UTC (permalink / raw)
To: linux-ia64
Aaargh! Still not right. That includes a spurious
hunk that backout out part of the FP fix. Don't apply
this bit!
@@ -779,8 +787,6 @@
/* Clear the stack memory reserved for pt_regs: */
memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
- ia64_set_kr(IA64_KR_FPU_OWNER, 0);
-
/*
* Initialize default control register to defer all speculative faults. The
* kernel MUST NOT depend on a particular setting of these bits (in other words,
-Tony
^ permalink raw reply [flat|nested] 60+ messages in thread