public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
* [Linux-ia64] unalinged access by loadpair instruction
@ 2002-12-09 14:28 Hideki Yamamoto
  2002-12-09 21:41 ` David Mosberger
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Hideki Yamamoto @ 2002-12-09 14:28 UTC (permalink / raw)
  To: linux-ia64

 Hi everyone,

 When unaligned access is happened by loadpair
 instruction, reloading by kernel is wrong.
 
 When unalined access is happeded by loadpair
 instruction in NORMAL loop, it is no problem.
 Please try to run the attached programs(c.c and a.s) 
 compiled by gcc.

 However, When it is happned by loadpair instruction in
 SWP loop, it fails to reload the data.
 Please try to run the attached programs(c.c and aa.s)
 compiled by gcc. aa.s is included loadpair in SWP loop.
 If there is no happening, please change the n
 parameter to bigger number.

 Sorry, I could not isolate whether or not it is caused
 by HW bug or Kernel problem.

 Let me know somebody understood the cause why it is happened.

 Thanks.

-- c.c
#define n 100
    double d[n],d2[n+1];
main() {
    int i,j;

    for( i = 0; i < n; i++ ) {
        d[i] = 123.0;
        d2[i] = 0.0;
    }
    // verify
    for( i = 0; i < n; i++ ) {
        if( d[i] != 123.0 ) {
            printf("Assignment to d[%d] is wrong\n", i);
            exit(-1);
        }
    }
    printf("Verify is ok\n");
    copy_by_loadpair(&d, &d2, n);
    for( i = 0; i < n; i++ ) {
        if( d2[i] != 123.0 ) {
            printf("Something is wrong!!\n d2[%d] = %f(should be d[%d]=%f)\n", i, d2[i], i, d[i]);
            printf("%f\n", d2[i+1]);
        }
    }
}

-- a.s 
	.file	"a.c"
	.pred.safe_across_calls p1-p5,p16-p63
.text
	.align 16
	.global copy_by_loadpair#
	.proc copy_by_loadpair#
copy_by_loadpair:
 {   .mmi
        alloc   r8=ar.pfs,3,0,0,0 ;;                     //0:  1   19
        nop.m   0
        add     r17=1,r0
 }
L1:
 {   .mii
        ldfpd   f6,f7=[r32]                                 //1:  2    4
        add     r2=8,r33                                 //1:  3    8
        nop.i   0
 }
 {   .mmi
        nop.m   0 ;;                              //1:  3    6
        stfd    [r33]ö
        add     r33=8,r33
 }
 {   .mmi
        nop.m   0 ;;                               //10:  3    7
        stfd    [r2]÷                                  //19:  3    9
        add     r32=8,r32;;
 }
 {   .mib
        cmp4.ne p8,p0=r17,r34
        add     r17=1,r17
  (p8)  br.cond.dpnt.many       L1 ;;                 //7:  3   19
 }
 {   .mib
        nop.m   0
        nop.i   0
        br.ret.sptk.many        b0 ;;                    //19:  4   10
 }
	.endp get_by_loadpair#
--
	.file	"a.c"
	.pred.safe_across_calls p1-p5,p16-p63
.text
	.align 16
	.global copy_by_loadpair#
	.proc copy_by_loadpair#
copy_by_loadpair:
 {   .mmi
        alloc   r8=ar.pfs,3,6,0,0 ;;
        nop.m   0
        add     r17=1,r0
 }
 {   .mmi
        add     r15=0,r32
        add     r2=0,r33
        add     r3=8,r33;;
 }
 {   .mii
        nop.m   0
        mov     ar.lc=r34
        nop.i   0 ;;
 }
 {   .mii
        nop.m   0
        mov     pr.rot=0x10000 ;;
        mov     ar.ec=5 ;;
 }
L1:
 {   .mmi
  (p16) ldfpd   f32,f33=[r15]
  (p16) add     r15=8,r15
        nop.i   0
 }
 {   .mmb
  (p20) stfd    [r3]ó7,8
        //nop.m   0
  (p20) stfd    [r2]ó6,8
        br.ctop.sptk    L1
 }
 {   .mib
        nop.m   0
        nop.i   0
        br.ret.sptk.many        b0 ;;                    //19:  4   10
 }
	.endp get_by_loadpair#


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Linux-ia64] unalinged access by loadpair instruction
  2002-12-09 14:28 [Linux-ia64] unalinged access by loadpair instruction Hideki Yamamoto
@ 2002-12-09 21:41 ` David Mosberger
  2002-12-09 22:59 ` Luck, Tony
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: David Mosberger @ 2002-12-09 21:41 UTC (permalink / raw)
  To: linux-ia64

It looks to me like getfpreg() and setfpreg() are pretty obviously
broken: they doesn't take rotation into account at all.  Seems this
was missed when the integer-side was fixed (by Tony, IIRC).  Anyone
want to take a shot at fixing this?

	--david

>>>>> On Mon, 09 Dec 2002 23:28:45 +0900, "Hideki Yamamoto" <hideki@hpc.bs1.fc.nec.co.jp> said:

  Hideki>  Hi everyone,

  Hideki>  When unaligned access is happened by loadpair instruction,
  Hideki> reloading by kernel is wrong.
 
  Hideki>  When unalined access is happeded by loadpair instruction in
  Hideki> NORMAL loop, it is no problem.  Please try to run the
  Hideki> attached programs(c.c and a.s) compiled by gcc.

  Hideki>  However, When it is happned by loadpair instruction in SWP
  Hideki> loop, it fails to reload the data.  Please try to run the
  Hideki> attached programs(c.c and aa.s) compiled by gcc. aa.s is
  Hideki> included loadpair in SWP loop.  If there is no happening,
  Hideki> please change the n parameter to bigger number.

  Hideki>  Sorry, I could not isolate whether or not it is caused by
  Hideki> HW bug or Kernel problem.

  Hideki>  Let me know somebody understood the cause why it is
  Hideki> happened.



^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [Linux-ia64] unalinged access by loadpair instruction
  2002-12-09 14:28 [Linux-ia64] unalinged access by loadpair instruction Hideki Yamamoto
  2002-12-09 21:41 ` David Mosberger
@ 2002-12-09 22:59 ` Luck, Tony
  2002-12-10  2:33 ` David Mosberger
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Luck, Tony @ 2002-12-09 22:59 UTC (permalink / raw)
  To: linux-ia64

You do recall correctly ... checking the archives, I posted a
patch on October 16, 2001.  I did fix the floating point case
back then too ... in fact my post claims to include the patch
for both integer and FP.  But I must have attached the wrong
patch file.  I've long since deleted all my 2.4.10 trees, so
I'd have to reconstruct from scratch (can't do it from memory,
those neurons have been re-assigned :-(

-Tony

-----Original Message-----
From: David Mosberger [mailto:davidm@napali.hpl.hp.com]
Sent: Monday, December 09, 2002 1:42 PM
To: Hideki Yamamoto
Cc: linux-ia64@linuxia64.org
Subject: Re: [Linux-ia64] unalinged access by loadpair instruction


It looks to me like getfpreg() and setfpreg() are pretty obviously
broken: they doesn't take rotation into account at all.  Seems this
was missed when the integer-side was fixed (by Tony, IIRC).  Anyone
want to take a shot at fixing this?

	--david

>>>>> On Mon, 09 Dec 2002 23:28:45 +0900, "Hideki Yamamoto" <hideki@hpc.bs1.fc.nec.co.jp> said:

  Hideki>  Hi everyone,

  Hideki>  When unaligned access is happened by loadpair instruction,
  Hideki> reloading by kernel is wrong.
 
  Hideki>  When unalined access is happeded by loadpair instruction in
  Hideki> NORMAL loop, it is no problem.  Please try to run the
  Hideki> attached programs(c.c and a.s) compiled by gcc.

  Hideki>  However, When it is happned by loadpair instruction in SWP
  Hideki> loop, it fails to reload the data.  Please try to run the
  Hideki> attached programs(c.c and aa.s) compiled by gcc. aa.s is
  Hideki> included loadpair in SWP loop.  If there is no happening,
  Hideki> please change the n parameter to bigger number.

  Hideki>  Sorry, I could not isolate whether or not it is caused by
  Hideki> HW bug or Kernel problem.

  Hideki>  Let me know somebody understood the cause why it is
  Hideki> happened.


_______________________________________________
Linux-IA64 mailing list
Linux-IA64@linuxia64.org
http://lists.linuxia64.org/lists/listinfo/linux-ia64


^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [Linux-ia64] unalinged access by loadpair instruction
  2002-12-09 14:28 [Linux-ia64] unalinged access by loadpair instruction Hideki Yamamoto
  2002-12-09 21:41 ` David Mosberger
  2002-12-09 22:59 ` Luck, Tony
@ 2002-12-10  2:33 ` David Mosberger
  2002-12-10 11:12 ` Hideki Yamamoto
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: David Mosberger @ 2002-12-10  2:33 UTC (permalink / raw)
  To: linux-ia64

>>>>> On Mon, 9 Dec 2002 14:59:30 -0800, "Luck, Tony" <tony.luck@intel.com> said:

  Tony> You do recall correctly ... checking the archives, I posted a
  Tony> patch on October 16, 2001.  I did fix the floating point case
  Tony> back then too ... in fact my post claims to include the patch
  Tony> for both integer and FP.  But I must have attached the wrong
  Tony> patch file.  I've long since deleted all my 2.4.10 trees, so
  Tony> I'd have to reconstruct from scratch (can't do it from memory,
  Tony> those neurons have been re-assigned :-(

OK, it looks like the fix is pretty straight-forward.  The patch below
_should_ work, though I haven't tested it extensively.

Hideki, can you try it out?  BTW: I think your test program is buggy.
The core-loop isn't right because br.ctop renames by one register
position, not two.  I attached a version of the test program which
does what you wanted.

	--david

=== arch/ia64/kernel/unaligned.c 1.6 vs edited ==--- 1.6/arch/ia64/kernel/unaligned.c	Thu Mar 14 00:28:41 2002
+++ edited/arch/ia64/kernel/unaligned.c	Mon Dec  9 18:24:54 2002
@@ -486,7 +486,21 @@
 	DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
 }
 
-#define IA64_FPH_OFFS(r) (r - IA64_FIRST_ROTATING_FR)
+/*
+ * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
+ * range from 32-127, result is in the range from 0-95.
+ */
+static inline unsigned long
+fph_index (struct pt_regs *regs, long regnum)
+{
+	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
+
+	regnum -= IA64_FIRST_ROTATING_FR;
+	regnum += rrb_fr;
+	if (regnum >= 96)
+		regnum -= 96;
+	return regnum;
+}
 
 static void
 setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
@@ -507,7 +521,7 @@
 	 */
 	if (regnum >= IA64_FIRST_ROTATING_FR) {
 		ia64_sync_fph(current);
-		current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval;
+		current->thread.fph[fph_index(regs, regnum)] = *fpval;
 	} else {
 		/*
 		 * pt_regs or switch_stack ?
@@ -566,7 +580,7 @@
 	 */
 	if (regnum >= IA64_FIRST_ROTATING_FR) {
 		ia64_flush_fph(current);
-		*fpval = current->thread.fph[IA64_FPH_OFFS(regnum)];
+		*fpval = current->thread.fph[fph_index(regs, regnum)];
 	} else {
 		/*
 		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
----------------------------------------------------
#define n 100

double d[n],d2[n+1];

main() {
    int i,j;

    for (i = 0; i < n; i++)  {
	    d[i] = i;
	    d2[i] = 0.0;
    }
    copy_by_loadpair(&d, &d2, n/2-1);
    for (i = 0; i < n; i++) {
	    if (d2[i] != i)
		    printf("d2[%d] = %f, should be d[%d]=%f\n",
			   i, d2[i], i, d[i]);
    }
}

----------------------------------------------------
	.file	"a.c"
	.pred.safe_across_calls p1-p5,p16-p63
.text
	.align 16
	.global copy_by_loadpair
	.proc copy_by_loadpair
copy_by_loadpair:
        alloc   r8=ar.pfs,3,6,0,0 ;;
        mov     r15=r32
        mov     r2=r33
        add     r3=8,r33
        mov     ar.lc=r34
        mov     pr.rot=0x10000
        mov     ar.ec=5 ;;
L1:
  (p16) ldfpd   f32,f37=[r15],16
  (p20) stfd    [r2]ó6,16
  (p20) stfd    [r3]ô1,16
        br.ctop.sptk    L1;;
        br.ret.sptk.many        b0 ;;
	.endp get_by_loadpair


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Linux-ia64] unalinged access by loadpair instruction
  2002-12-09 14:28 [Linux-ia64] unalinged access by loadpair instruction Hideki Yamamoto
                   ` (2 preceding siblings ...)
  2002-12-10  2:33 ` David Mosberger
@ 2002-12-10 11:12 ` Hideki Yamamoto
  2002-12-10 12:18 ` 
  2002-12-11  7:26 ` Hideki Yamamoto
  5 siblings, 0 replies; 7+ messages in thread
From: Hideki Yamamoto @ 2002-12-10 11:12 UTC (permalink / raw)
  To: linux-ia64

 Hi David,

>   Tony> I'd have to reconstruct from scratch (can't do it from memory,
>   Tony> those neurons have been re-assigned :-(
> OK, it looks like the fix is pretty straight-forward.  The patch below
> _should_ work, though I haven't tested it extensively.
> 
> Hideki, can you try it out?  BTW: I think your test program is buggy.

 OK, I will try to run on the Kernel applied the patch
 you sent.

> The core-loop isn't right because br.ctop renames by one register
> position, not two.  I attached a version of the test program which
> does what you wanted.

 Sorry, I did not understand why my program is buggy
 even if I saw your program. So the incremental value
 in my program is 8bytes, it means on purpose. :-)

 Thanks you for sending the patch.

End of my email
--
Yours faithfully,
Hideki Yamamoto   (V).v.(V) # Empowered by Innovation

> 
> === arch/ia64/kernel/unaligned.c 1.6 vs edited ==> --- 1.6/arch/ia64/kernel/unaligned.c	Thu Mar 14 00:28:41 2002
> +++ edited/arch/ia64/kernel/unaligned.c	Mon Dec  9 18:24:54 2002
> @@ -486,7 +486,21 @@
>  	DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
>  }
>  
> -#define IA64_FPH_OFFS(r) (r - IA64_FIRST_ROTATING_FR)
> +/*
> + * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
> + * range from 32-127, result is in the range from 0-95.
> + */
> +static inline unsigned long
> +fph_index (struct pt_regs *regs, long regnum)
> +{
> +	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
> +
> +	regnum -= IA64_FIRST_ROTATING_FR;
> +	regnum += rrb_fr;
> +	if (regnum >= 96)
> +		regnum -= 96;
> +	return regnum;
> +}
>  
>  static void
>  setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
> @@ -507,7 +521,7 @@
>  	 */
>  	if (regnum >= IA64_FIRST_ROTATING_FR) {
>  		ia64_sync_fph(current);
> -		current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval;
> +		current->thread.fph[fph_index(regs, regnum)] = *fpval;
>  	} else {
>  		/*
>  		 * pt_regs or switch_stack ?
> @@ -566,7 +580,7 @@
>  	 */
>  	if (regnum >= IA64_FIRST_ROTATING_FR) {
>  		ia64_flush_fph(current);
> -		*fpval = current->thread.fph[IA64_FPH_OFFS(regnum)];
> +		*fpval = current->thread.fph[fph_index(regs, regnum)];
>  	} else {
>  		/*
>  		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
> ----------------------------------------------------
> #define n 100
> 
> double d[n],d2[n+1];
> 
> main() {
>     int i,j;
> 
>     for (i = 0; i < n; i++)  {
> 	    d[i] = i;
> 	    d2[i] = 0.0;
>     }
>     copy_by_loadpair(&d, &d2, n/2-1);
>     for (i = 0; i < n; i++) {
> 	    if (d2[i] != i)
> 		    printf("d2[%d] = %f, should be d[%d]=%f\n",
> 			   i, d2[i], i, d[i]);
>     }
> }
> 
> ----------------------------------------------------
> 	.file	"a.c"
> 	.pred.safe_across_calls p1-p5,p16-p63
> .text
> 	.align 16
> 	.global copy_by_loadpair
> 	.proc copy_by_loadpair
> copy_by_loadpair:
>         alloc   r8=ar.pfs,3,6,0,0 ;;
>         mov     r15=r32
>         mov     r2=r33
>         add     r3=8,r33
>         mov     ar.lc=r34
>         mov     pr.rot=0x10000
>         mov     ar.ec=5 ;;
> L1:
>   (p16) ldfpd   f32,f37=[r15],16
>   (p20) stfd    [r2]ó6,16
>   (p20) stfd    [r3]ô1,16
>         br.ctop.sptk    L1;;
>         br.ret.sptk.many        b0 ;;
> 	.endp get_by_loadpair
> 
> _______________________________________________
> Linux-IA64 mailing list
> Linux-IA64@linuxia64.org
> http://lists.linuxia64.org/lists/listinfo/linux-ia64
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Linux-ia64] unalinged access by loadpair instruction
  2002-12-09 14:28 [Linux-ia64] unalinged access by loadpair instruction Hideki Yamamoto
                   ` (3 preceding siblings ...)
  2002-12-10 11:12 ` Hideki Yamamoto
@ 2002-12-10 12:18 ` 
  2002-12-11  7:26 ` Hideki Yamamoto
  5 siblings, 0 replies; 7+ messages in thread
From:  @ 2002-12-10 12:18 UTC (permalink / raw)
  To: linux-ia64

 Hi David,

 Indeed, you are right. There is my mistake in my program.
 The register will be overwritten...

 Thank you.

End of my email
--
Yours faithfully,
Hideki Yamamoto   (V).v.(V) # Empowered by Innovation

> > The core-loop isn't right because br.ctop renames by one register
> > position, not two.  I attached a version of the test program which
> > does what you wanted.
>  Sorry, I did not understand why my program is buggy
>  even if I saw your program. So the incremental value
>  in my program is 8bytes, it means on purpose. :-)
> 
>  Thanks you for sending the patch.
> 
> End of my email
> --
> Yours faithfully,
> Hideki Yamamoto   (V).v.(V) # Empowered by Innovation
> 
> > 
> > === arch/ia64/kernel/unaligned.c 1.6 vs edited ==> > --- 1.6/arch/ia64/kernel/unaligned.c	Thu Mar 14 00:28:41 2002
> > +++ edited/arch/ia64/kernel/unaligned.c	Mon Dec  9 18:24:54 2002
> > @@ -486,7 +486,21 @@
> >  	DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
> >  }
> >  
> > -#define IA64_FPH_OFFS(r) (r - IA64_FIRST_ROTATING_FR)
> > +/*
> > + * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
> > + * range from 32-127, result is in the range from 0-95.
> > + */
> > +static inline unsigned long
> > +fph_index (struct pt_regs *regs, long regnum)
> > +{
> > +	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
> > +
> > +	regnum -= IA64_FIRST_ROTATING_FR;
> > +	regnum += rrb_fr;
> > +	if (regnum >= 96)
> > +		regnum -= 96;
> > +	return regnum;
> > +}
> >  
> >  static void
> >  setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
> > @@ -507,7 +521,7 @@
> >  	 */
> >  	if (regnum >= IA64_FIRST_ROTATING_FR) {
> >  		ia64_sync_fph(current);
> > -		current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval;
> > +		current->thread.fph[fph_index(regs, regnum)] = *fpval;
> >  	} else {
> >  		/*
> >  		 * pt_regs or switch_stack ?
> > @@ -566,7 +580,7 @@
> >  	 */
> >  	if (regnum >= IA64_FIRST_ROTATING_FR) {
> >  		ia64_flush_fph(current);
> > -		*fpval = current->thread.fph[IA64_FPH_OFFS(regnum)];
> > +		*fpval = current->thread.fph[fph_index(regs, regnum)];
> >  	} else {
> >  		/*
> >  		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
> > ----------------------------------------------------
> > #define n 100
> > 
> > double d[n],d2[n+1];
> > 
> > main() {
> >     int i,j;
> > 
> >     for (i = 0; i < n; i++)  {
> > 	    d[i] = i;
> > 	    d2[i] = 0.0;
> >     }
> >     copy_by_loadpair(&d, &d2, n/2-1);
> >     for (i = 0; i < n; i++) {
> > 	    if (d2[i] != i)
> > 		    printf("d2[%d] = %f, should be d[%d]=%f\n",
> > 			   i, d2[i], i, d[i]);
> >     }
> > }
> > 
> > ----------------------------------------------------
> > 	.file	"a.c"
> > 	.pred.safe_across_calls p1-p5,p16-p63
> > .text
> > 	.align 16
> > 	.global copy_by_loadpair
> > 	.proc copy_by_loadpair
> > copy_by_loadpair:
> >         alloc   r8=ar.pfs,3,6,0,0 ;;
> >         mov     r15=r32
> >         mov     r2=r33
> >         add     r3=8,r33
> >         mov     ar.lc=r34
> >         mov     pr.rot=0x10000
> >         mov     ar.ec=5 ;;
> > L1:
> >   (p16) ldfpd   f32,f37=[r15],16
> >   (p20) stfd    [r2]ó6,16
> >   (p20) stfd    [r3]ô1,16
> >         br.ctop.sptk    L1;;
> >         br.ret.sptk.many        b0 ;;
> > 	.endp get_by_loadpair
> > 
> > _______________________________________________
> > Linux-IA64 mailing list
> > Linux-IA64@linuxia64.org
> > http://lists.linuxia64.org/lists/listinfo/linux-ia64
> > 
> 
> _______________________________________________
> Linux-IA64 mailing list
> Linux-IA64@linuxia64.org
> http://lists.linuxia64.org/lists/listinfo/linux-ia64
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Linux-ia64] unalinged access by loadpair instruction
  2002-12-09 14:28 [Linux-ia64] unalinged access by loadpair instruction Hideki Yamamoto
                   ` (4 preceding siblings ...)
  2002-12-10 12:18 ` 
@ 2002-12-11  7:26 ` Hideki Yamamoto
  5 siblings, 0 replies; 7+ messages in thread
From: Hideki Yamamoto @ 2002-12-11  7:26 UTC (permalink / raw)
  To: linux-ia64

 Hi David,

 I have just confirmed if the kernel applied the patch
you sent.
 It works fine.

 Thank you so much.

End of my email
--
Yours faithfully,
Hideki Yamamoto   (V).v.(V) # Empowered by Innovation

At Mon, 9 Dec 2002 18:33:38 -0800,
David Mosberger wrote:
> 
> >>>>> On Mon, 9 Dec 2002 14:59:30 -0800, "Luck, Tony" <tony.luck@intel.com> said:
> 
>   Tony> You do recall correctly ... checking the archives, I posted a
>   Tony> patch on October 16, 2001.  I did fix the floating point case
>   Tony> back then too ... in fact my post claims to include the patch
>   Tony> for both integer and FP.  But I must have attached the wrong
>   Tony> patch file.  I've long since deleted all my 2.4.10 trees, so
>   Tony> I'd have to reconstruct from scratch (can't do it from memory,
>   Tony> those neurons have been re-assigned :-(
> 
> OK, it looks like the fix is pretty straight-forward.  The patch below
> _should_ work, though I haven't tested it extensively.
> 
> Hideki, can you try it out?  BTW: I think your test program is buggy.
> The core-loop isn't right because br.ctop renames by one register
> position, not two.  I attached a version of the test program which
> does what you wanted.
> 
> 	--david
> 
> === arch/ia64/kernel/unaligned.c 1.6 vs edited ==> --- 1.6/arch/ia64/kernel/unaligned.c	Thu Mar 14 00:28:41 2002
> +++ edited/arch/ia64/kernel/unaligned.c	Mon Dec  9 18:24:54 2002
> @@ -486,7 +486,21 @@
>  	DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
>  }
>  
> -#define IA64_FPH_OFFS(r) (r - IA64_FIRST_ROTATING_FR)
> +/*
> + * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
> + * range from 32-127, result is in the range from 0-95.
> + */
> +static inline unsigned long
> +fph_index (struct pt_regs *regs, long regnum)
> +{
> +	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
> +
> +	regnum -= IA64_FIRST_ROTATING_FR;
> +	regnum += rrb_fr;
> +	if (regnum >= 96)
> +		regnum -= 96;
> +	return regnum;
> +}
>  
>  static void
>  setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
> @@ -507,7 +521,7 @@
>  	 */
>  	if (regnum >= IA64_FIRST_ROTATING_FR) {
>  		ia64_sync_fph(current);
> -		current->thread.fph[IA64_FPH_OFFS(regnum)] = *fpval;
> +		current->thread.fph[fph_index(regs, regnum)] = *fpval;
>  	} else {
>  		/*
>  		 * pt_regs or switch_stack ?
> @@ -566,7 +580,7 @@
>  	 */
>  	if (regnum >= IA64_FIRST_ROTATING_FR) {
>  		ia64_flush_fph(current);
> -		*fpval = current->thread.fph[IA64_FPH_OFFS(regnum)];
> +		*fpval = current->thread.fph[fph_index(regs, regnum)];
>  	} else {
>  		/*
>  		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
> ----------------------------------------------------
> #define n 100
> 
> double d[n],d2[n+1];
> 
> main() {
>     int i,j;
> 
>     for (i = 0; i < n; i++)  {
> 	    d[i] = i;
> 	    d2[i] = 0.0;
>     }
>     copy_by_loadpair(&d, &d2, n/2-1);
>     for (i = 0; i < n; i++) {
> 	    if (d2[i] != i)
> 		    printf("d2[%d] = %f, should be d[%d]=%f\n",
> 			   i, d2[i], i, d[i]);
>     }
> }
> 
> ----------------------------------------------------
> 	.file	"a.c"
> 	.pred.safe_across_calls p1-p5,p16-p63
> .text
> 	.align 16
> 	.global copy_by_loadpair
> 	.proc copy_by_loadpair
> copy_by_loadpair:
>         alloc   r8=ar.pfs,3,6,0,0 ;;
>         mov     r15=r32
>         mov     r2=r33
>         add     r3=8,r33
>         mov     ar.lc=r34
>         mov     pr.rot=0x10000
>         mov     ar.ec=5 ;;
> L1:
>   (p16) ldfpd   f32,f37=[r15],16
>   (p20) stfd    [r2]ó6,16
>   (p20) stfd    [r3]ô1,16
>         br.ctop.sptk    L1;;
>         br.ret.sptk.many        b0 ;;
> 	.endp get_by_loadpair
> 
> _______________________________________________
> Linux-IA64 mailing list
> Linux-IA64@linuxia64.org
> http://lists.linuxia64.org/lists/listinfo/linux-ia64
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2002-12-11  7:26 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-12-09 14:28 [Linux-ia64] unalinged access by loadpair instruction Hideki Yamamoto
2002-12-09 21:41 ` David Mosberger
2002-12-09 22:59 ` Luck, Tony
2002-12-10  2:33 ` David Mosberger
2002-12-10 11:12 ` Hideki Yamamoto
2002-12-10 12:18 ` 
2002-12-11  7:26 ` Hideki Yamamoto

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox