Here is a patch fix the lfetches accessing beyond a page that can have a side effect of generating an MCA if the physical memory does not exist. For example, copy_page will try to access video memory for the page that ends at 640K and this can cause MCA for some platforms. Some of the Bigsur UP boot failures were root caused to this problem. Both copy page and clear_page are very performance critical functions and tuned for both Itanium and McKinley. The patch is implemented using unused slots (nops) and has no performance impact. For kernels 2.4.18 and higher you will just need the patch for copy_page(). clear_page and McKinley optimized copy_page has been already taken care. --- linux-2.4.16-akm/arch/ia64/lib/copy_page.S Fri Nov 9 14:26:17 2001 +++ linux/arch/ia64/lib/copy_page.S Wed Apr 10 17:38:07 2002 @@ -30,6 +30,7 @@ #define tgt2 r23 #define srcf r24 #define tgtf r25 +#define tgt_last r26 #define Nrot ((8*PIPE_DEPTH+7)&~7) @@ -55,18 +56,21 @@ mov src1=in1 adds src2=8,in1 + mov tgt_last = PAGE_SIZE ;; adds tgt2=8,in0 add srcf=512,in1 mov ar.lc=lcount mov tgt1=in0 add tgtf=512,in0 + add tgt_last = tgt_last, in0 ;; 1: (p[0]) ld8 t1[0]=[src1],16 (EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16 (p[0]) ld8 t2[0]=[src2],16 (EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16 + cmp.ltu p6,p0 = tgtf, tgt_last ;; (p[0]) ld8 t3[0]=[src1],16 (EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16 @@ -83,8 +87,8 @@ (p[0]) ld8 t8[0]=[src2],16 (EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16 - lfetch [srcf], 64 - lfetch [tgtf], 64 +(p6) lfetch [srcf], 64 +(p6) lfetch [tgtf], 64 br.ctop.sptk.few 1b ;; mov pr=saved_pr,0xffffffffffff0000 // restore predicates For kernels below 2.4.18, you will need the patch for clear_page also. --- linux-2.4.16-akm/arch/ia64/lib/clear_page.S Fri Nov 9 14:26:17 2001 +++ linux/arch/ia64/lib/clear_page.S Wed Apr 10 17:32:26 2002 @@ -23,15 +23,18 @@ #define dst2 r9 #define dst3 r10 #define dst_fetch r11 +#define dst_last r14 GLOBAL_ENTRY(clear_page) .prologue .regstk 1,0,0,0 mov r16 = PAGE_SIZE/64-1 // -1 = repeat/until + mov r17 = PAGE_SIZE ;; .save ar.lc, saved_lc mov saved_lc = ar.lc .body + add dst_last = r17, dst0 mov ar.lc = r16 adds dst1 = 16, dst0 adds dst2 = 32, dst0 @@ -40,10 +43,12 @@ ;; 1: stf.spill.nta [dst0] = f0, 64 stf.spill.nta [dst1] = f0, 64 + cmp.ltu p6,p0 = dst_fetch, dst_last stf.spill.nta [dst2] = f0, 64 stf.spill.nta [dst3] = f0, 64 + ;; - lfetch [dst_fetch], 64 +(p6) lfetch [dst_fetch], 64 br.cloop.dptk.few 1b ;; mov ar.lc = r2 // restore lc Thanks, Asit <> <>