* [Qemu-devel] [PATCH] Updated Sparc support
@ 2003-05-13  4:13 David S. Miller
  2003-05-13 12:31 ` [Qemu-devel] " Fabrice Bellard
  2003-05-15 13:03 ` [Qemu-devel] [PATCH] Updated Sparc support Johan Rydberg
  0 siblings, 2 replies; 10+ messages in thread
From: David S. Miller @ 2003-05-13  4:13 UTC (permalink / raw)
  To: qemu-devel
Fabrice, here are updated patches for Sparc support.
With the mmap changes you installed today, things work
quite well :-)
Please do not miss the configure and Makefile changes, they
somehow disappeared last time you applied my Sparc changes :-)
The main generic change here is to support skipping both prologue and
epilogue during op-i386.h generation.  This is supported via
'start_offset' which the cpu code can advance past initial
instructions.
Also it was necessary to add support for outputting both epilogue
and prologue during code generation.
Like PPC, I now hold all the main x86 registers (EAX etc.) in Sparc
registers.  This was tricky to accomplish with register windows, but
finally it works...
The current state is that for 32-bit Sparc static binaries seem to
work and test-i386 passes except for 2 or 3 float tests which I will
fix shortly.  My plan is to fix the float tests, make sure 64-bit
sparc is working properly, then move on to playing with dynamically
loaded libraries and making other improvements.
Thanks.
--- ./dyngen.c.~1~	Mon May 12 18:05:54 2003
+++ ./dyngen.c	Mon May 12 18:17:20 2003
@@ -274,14 +274,20 @@ void gen_code(const char *name, host_ulo
 {
     int copy_size = 0;
     uint8_t *p_start, *p_end;
+    host_ulong start_offset;
     int nb_args, i, n;
     uint8_t args_present[MAX_ARGS];
     const char *sym_name, *p;
     ELF_RELOC *rel;
 
-    /* compute exact size excluding return instruction */
+    /* Compute exact size excluding prologue and epilogue instructions.
+     * Increment start_offset to skip epilogue instructions, then compute
+     * copy_size the indicate the size of the remaining instructions (in
+     * bytes).
+     */
     p_start = text + offset;
     p_end = p_start + size;
+    start_offset = offset;
     switch(ELF_ARCH) {
     case EM_386:
         {
@@ -343,41 +349,63 @@ void gen_code(const char *name, host_ulo
     case EM_SPARC:
     case EM_SPARC32PLUS:
 	{
+	    uint32_t start_insn, end_insn1, end_insn2, skip_insn;
             uint8_t *p;
             p = (void *)(p_end - 8);
             if (p <= p_start)
                 error("empty code for %s", name);
-	    if (get32((uint32_t *)(p_start + 0x0)) != 0x9de3bf98)
-                error("save %%sp,-104,%%sp expected at the start of %s "
-		      "found [%08x]",
-		      name, get32((uint32_t *)(p_start + 0x0)));
-            if (get32((uint32_t *)(p + 0x0)) != 0x81c7e008 ||
-		get32((uint32_t *)(p + 0x4)) != 0x81e80000)
-                error("ret; restore; expected at the end of %s found [%08x:%08x]",
-		      name,
-		      get32((uint32_t *)(p + 0x0)),
-		      get32((uint32_t *)(p + 0x4)));
+	    start_insn = get32((uint32_t *)(p_start + 0x0));
+	    end_insn1 = get32((uint32_t *)(p + 0x0));
+	    end_insn2 = get32((uint32_t *)(p + 0x4));
+	    if ((start_insn & ~0x1fff) == 0x9de3a000) {
+		p_start += 0x4;
+		start_offset += 0x4;
+		if ((int)(start_insn | ~0x1fff) < -128)
+		    error("Found bogus save at the start of %s", name);
+		if (end_insn1 != 0x81c7e008 || end_insn2 != 0x81e80000)
+		    error("ret; restore; not found at end of %s", name);
+	    } else {
+		error("No save at the beginning of %s", name);
+	    }
+
+	    /* Skip a preceeding nop, if present.  */
+	    if (p > p_start) {
+		skip_insn = get32((uint32_t *)(p - 0x4));
+		if (skip_insn == 0x01000000)
+		    p -= 4;
+	    }
 
             copy_size = p - p_start;
 	}
 	break;
     case EM_SPARCV9:
 	{
+	    uint32_t start_insn, end_insn1, end_insn2, skip_insn;
             uint8_t *p;
             p = (void *)(p_end - 8);
             if (p <= p_start)
                 error("empty code for %s", name);
-	    if (get32((uint32_t *)(p_start + 0x0)) != 0x9de3bf40)
-                error("save %%sp,-192,%%sp expected at the start of %s "
-		      "found [%08x]",
-		      name, get32((uint32_t *)(p_start + 0x0)));
-            if (get32((uint32_t *)(p + 0x0)) != 0x81cfe008 ||
-		get32((uint32_t *)(p + 0x4)) != 0x01000000)
-                error("rett %%i7+8; nop; expected at the end of %s "
-		      "found [%08x:%08x]",
-		      name,
-		      get32((uint32_t *)(p + 0x0)),
-		      get32((uint32_t *)(p + 0x4)));
+	    start_insn = get32((uint32_t *)(p_start + 0x0));
+	    end_insn1 = get32((uint32_t *)(p + 0x0));
+	    end_insn2 = get32((uint32_t *)(p + 0x4));
+	    if ((start_insn & ~0x1fff) == 0x9de3a000) {
+		p_start += 0x4;
+		start_offset += 0x4;
+		if ((int)(start_insn | ~0x1fff) < -256)
+		    error("Found bogus save at the start of %s", name);
+		if (end_insn1 != 0x81c7e008 || end_insn2 != 0x81e80000)
+		    error("ret; restore; not found at end of %s", name);
+	    } else {
+		error("No save at the beginning of %s", name);
+	    }
+
+	    /* Skip a preceeding nop, if present.  */
+	    if (p > p_start) {
+		skip_insn = get32((uint32_t *)(p - 0x4));
+		if (skip_insn == 0x01000000)
+		    p -= 4;
+	    }
+
             copy_size = p - p_start;
 	}
 	break;
@@ -390,7 +418,8 @@ void gen_code(const char *name, host_ulo
         args_present[i] = 0;
 
     for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
-        if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+        if (rel->r_offset >= start_offset &&
+	    rel->r_offset < start_offset + copy_size) {
             sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
             if (strstart(sym_name, "__op_param", &p)) {
                 n = strtoul(p, NULL, 10);
@@ -427,7 +456,8 @@ void gen_code(const char *name, host_ulo
         fprintf(outfile, "    extern void %s();\n", name);
 
         for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
-            if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+            if (rel->r_offset >= start_offset &&
+		rel->r_offset < start_offset + copy_size) {
                 sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
                 if (*sym_name && !strstart(sym_name, "__op_param", &p)) {
 #if defined(HOST_SPARC)
@@ -443,7 +473,7 @@ void gen_code(const char *name, host_ulo
             }
         }
 
-        fprintf(outfile, "    memcpy(gen_code_ptr, &%s, %d);\n", name, copy_size);
+        fprintf(outfile, "    memcpy(gen_code_ptr, (void *)((char *)&%s+%d), %d);\n", name, start_offset - offset, copy_size);
         for(i = 0; i < nb_args; i++) {
             fprintf(outfile, "    param%d = *opparam_ptr++;\n", i + 1);
         }
@@ -455,7 +485,8 @@ void gen_code(const char *name, host_ulo
                 int type;
                 int addend;
                 for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
-                if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+                if (rel->r_offset >= start_offset &&
+		    rel->r_offset < start_offset + copy_size) {
                     sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
                     if (strstart(sym_name, "__op_param", &p)) {
                         snprintf(name, sizeof(name), "param%s", p);
@@ -467,11 +498,11 @@ void gen_code(const char *name, host_ulo
                     switch(type) {
                     case R_386_32:
                         fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n", 
-                                rel->r_offset - offset, name, addend);
+                                rel->r_offset - start_offset, name, addend);
                         break;
                     case R_386_PC32:
                         fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s - (long)(gen_code_ptr + %d) + %d;\n", 
-                                rel->r_offset - offset, name, rel->r_offset - offset, addend);
+                                rel->r_offset - start_offset, name, rel->r_offset - start_offset, addend);
                         break;
                     default:
                         error("unsupported i386 relocation (%d)", type);
@@ -485,7 +516,8 @@ void gen_code(const char *name, host_ulo
                 int type;
                 int addend;
                 for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
-                    if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+                    if (rel->r_offset >= start_offset &&
+			rel->r_offset < start_offset + copy_size) {
                         sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
                         if (strstart(sym_name, "__op_param", &p)) {
                             snprintf(name, sizeof(name), "param%s", p);
@@ -497,24 +529,24 @@ void gen_code(const char *name, host_ulo
                         switch(type) {
                         case R_PPC_ADDR32:
                             fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n", 
-                                    rel->r_offset - offset, name, addend);
+                                    rel->r_offset - start_offset, name, addend);
                             break;
                         case R_PPC_ADDR16_LO:
                             fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %d) = (%s + %d);\n", 
-                                    rel->r_offset - offset, name, addend);
+                                    rel->r_offset - start_offset, name, addend);
                             break;
                         case R_PPC_ADDR16_HI:
                             fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %d) = (%s + %d) >> 16;\n", 
-                                    rel->r_offset - offset, name, addend);
+                                    rel->r_offset - start_offset, name, addend);
                             break;
                         case R_PPC_ADDR16_HA:
                             fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %d) = (%s + %d + 0x8000) >> 16;\n", 
-                                    rel->r_offset - offset, name, addend);
+                                    rel->r_offset - start_offset, name, addend);
                             break;
                         case R_PPC_REL24:
                             /* warning: must be at 32 MB distancy */
                             fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = (*(uint32_t *)(gen_code_ptr + %d) & ~0x03fffffc) | ((%s - (long)(gen_code_ptr + %d) + %d) & 0x03fffffc);\n", 
-                                    rel->r_offset - offset, rel->r_offset - offset, name, rel->r_offset - offset, addend);
+                                    rel->r_offset - start_offset, rel->r_offset - start_offset, name, rel->r_offset - start_offset, addend);
                             break;
                         default:
                             error("unsupported powerpc relocation (%d)", type);
@@ -528,7 +560,8 @@ void gen_code(const char *name, host_ulo
                 int type;
                 int addend;
                 for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
-                    if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+                    if (rel->r_offset >= start_offset &&
+			rel->r_offset < start_offset + copy_size) {
                         sym_name = strtab + symtab[ELFW(R_SYM)(rel->r_info)].st_name;
                         if (strstart(sym_name, "__op_param", &p)) {
                             snprintf(name, sizeof(name), "param%s", p);
@@ -540,15 +573,15 @@ void gen_code(const char *name, host_ulo
                         switch(type) {
                         case R_390_32:
                             fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n", 
-                                    rel->r_offset - offset, name, addend);
+                                    rel->r_offset - start_offset, name, addend);
                             break;
                         case R_390_16:
                             fprintf(outfile, "    *(uint16_t *)(gen_code_ptr + %d) = %s + %d;\n", 
-                                    rel->r_offset - offset, name, addend);
+                                    rel->r_offset - start_offset, name, addend);
                             break;
                         case R_390_8:
                             fprintf(outfile, "    *(uint8_t *)(gen_code_ptr + %d) = %s + %d;\n", 
-                                    rel->r_offset - offset, name, addend);
+                                    rel->r_offset - start_offset, name, addend);
                             break;
                         default:
                             error("unsupported s390 relocation (%d)", type);
@@ -559,7 +592,7 @@ void gen_code(const char *name, host_ulo
 #elif defined(HOST_ALPHA)
             {
                 for (i = 0, rel = relocs; i < nb_relocs; i++, rel++) {
-		    if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+		    if (rel->r_offset >= start_offset && rel->r_offset < start_offset + copy_size) {
 			int type;
 
 			type = ELF64_R_TYPE(rel->r_info);
@@ -569,9 +602,9 @@ void gen_code(const char *name, host_ulo
 			    /* The gp is just 32 bit, and never changes, so it's easiest to emit it
 			       as an immediate instead of constructing it from the pv or ra.  */
 			    fprintf(outfile, "    immediate_ldah(gen_code_ptr + %ld, gp);\n",
-				    rel->r_offset - offset);
+				    rel->r_offset - start_offset);
 			    fprintf(outfile, "    immediate_lda(gen_code_ptr + %ld, gp);\n",
-				    rel->r_offset - offset + rel->r_addend);
+				    rel->r_offset - start_offset + rel->r_addend);
 			    break;
 			case R_ALPHA_LITUSE:
 			    /* jsr to literal hint. Could be used to optimize to bsr. Ignore for
@@ -591,18 +624,18 @@ void gen_code(const char *name, host_ulo
 			       special treatment.  */
 			    if (strstart(sym_name, "__op_param", &p))
 				fprintf(outfile, "    immediate_ldah(gen_code_ptr + %ld, param%s);\n",
-					rel->r_offset - offset, p);
+					rel->r_offset - start_offset, p);
 			    break;
 			case R_ALPHA_GPRELLOW:
 			    if (strstart(sym_name, "__op_param", &p))
 				fprintf(outfile, "    immediate_lda(gen_code_ptr + %ld, param%s);\n",
-					rel->r_offset - offset, p);
+					rel->r_offset - start_offset, p);
 			    break;
 			case R_ALPHA_BRSGP:
 			    /* PC-relative jump. Tweak offset to skip the two instructions that try to
 			       set up the gp from the pv.  */
 			    fprintf(outfile, "    fix_bsr(gen_code_ptr + %ld, (uint8_t *) &%s - (gen_code_ptr + %ld) + 4);\n",
-				    rel->r_offset - offset, sym_name, rel->r_offset - offset);
+				    rel->r_offset - start_offset, sym_name, rel->r_offset - start_offset);
 			    break;
 			default:
 			    error("unsupported Alpha relocation (%d)", type);
@@ -616,7 +649,7 @@ void gen_code(const char *name, host_ulo
                 int type;
                 int addend;
                 for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
-                    if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+                    if (rel->r_offset >= start_offset && rel->r_offset < start_offset + copy_size) {
                         sym_name = strtab + symtab[ELF64_R_SYM(rel->r_info)].st_name;
                         if (strstart(sym_name, "__op_param", &p)) {
                             snprintf(name, sizeof(name), "param%s", p);
@@ -642,7 +675,8 @@ void gen_code(const char *name, host_ulo
                 int type;
                 int addend;
                 for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
-                    if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+                    if (rel->r_offset >= start_offset &&
+			rel->r_offset < start_offset + copy_size) {
                         sym_name = strtab + symtab[ELF32_R_SYM(rel->r_info)].st_name;
                         if (strstart(sym_name, "__op_param", &p)) {
                             snprintf(name, sizeof(name), "param%s", p);
@@ -660,16 +694,16 @@ void gen_code(const char *name, host_ulo
                         switch(type) {
                         case R_SPARC_32:
                             fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n", 
-                                    rel->r_offset - offset, name, addend);
+                                    rel->r_offset - start_offset, name, addend);
 			    break;
 			case R_SPARC_HI22:
                             fprintf(outfile,
 				    "    *(uint32_t *)(gen_code_ptr + %d) = "
 				    "((*(uint32_t *)(gen_code_ptr + %d)) "
 				    " & ~0x3fffff) "
-				    " | ((%s + %d) & 0x3fffff);\n",
-                                    rel->r_offset - offset,
-				    rel->r_offset - offset,
+				    " | (((%s + %d) >> 10) & 0x3fffff);\n",
+                                    rel->r_offset - start_offset,
+				    rel->r_offset - start_offset,
 				    name, addend);
 			    break;
 			case R_SPARC_LO10:
@@ -678,8 +712,8 @@ void gen_code(const char *name, host_ulo
 				    "((*(uint32_t *)(gen_code_ptr + %d)) "
 				    " & ~0x3ff) "
 				    " | ((%s + %d) & 0x3ff);\n",
-                                    rel->r_offset - offset,
-				    rel->r_offset - offset,
+                                    rel->r_offset - start_offset,
+				    rel->r_offset - start_offset,
 				    name, addend);
 			    break;
 			case R_SPARC_WDISP30:
@@ -687,11 +721,12 @@ void gen_code(const char *name, host_ulo
 				    "    *(uint32_t *)(gen_code_ptr + %d) = "
 				    "((*(uint32_t *)(gen_code_ptr + %d)) "
 				    " & ~0x3fffffff) "
-				    " | ((((%s + %d) - (long)gen_code_ptr)>>2) "
+				    " | ((((%s + %d) - (long)(gen_code_ptr + %d))>>2) "
 				    "    & 0x3fffffff);\n",
-				    rel->r_offset - offset,
-				    rel->r_offset - offset,
-				    name, addend);
+				    rel->r_offset - start_offset,
+				    rel->r_offset - start_offset,
+				    name, addend,
+				    rel->r_offset - start_offset);
 			    break;
                         default:
                             error("unsupported sparc relocation (%d)", type);
@@ -705,7 +740,8 @@ void gen_code(const char *name, host_ulo
                 int type;
                 int addend;
                 for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
-                    if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+                    if (rel->r_offset >= start_offset &&
+			rel->r_offset < start_offset + copy_size) {
                         sym_name = strtab + symtab[ELF64_R_SYM(rel->r_info)].st_name;
                         if (strstart(sym_name, "__op_param", &p)) {
                             snprintf(name, sizeof(name), "param%s", p);
@@ -717,16 +753,16 @@ void gen_code(const char *name, host_ulo
                         switch(type) {
                         case R_SPARC_32:
                             fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %d) = %s + %d;\n",
-                                    rel->r_offset - offset, name, addend);
+                                    rel->r_offset - start_offset, name, addend);
 			    break;
 			case R_SPARC_HI22:
                             fprintf(outfile,
 				    "    *(uint32_t *)(gen_code_ptr + %d) = "
 				    "((*(uint32_t *)(gen_code_ptr + %d)) "
 				    " & ~0x3fffff) "
-				    " | ((%s + %d) & 0x3fffff);\n",
-                                    rel->r_offset - offset,
-				    rel->r_offset - offset,
+				    " | (((%s + %d) >> 10) & 0x3fffff);\n",
+                                    rel->r_offset - start_offset,
+				    rel->r_offset - start_offset,
 				    name, addend);
 			    break;
 			case R_SPARC_LO10:
@@ -735,8 +771,8 @@ void gen_code(const char *name, host_ulo
 				    "((*(uint32_t *)(gen_code_ptr + %d)) "
 				    " & ~0x3ff) "
 				    " | ((%s + %d) & 0x3ff);\n",
-                                    rel->r_offset - offset,
-				    rel->r_offset - offset,
+                                    rel->r_offset - start_offset,
+				    rel->r_offset - start_offset,
 				    name, addend);
 			    break;
 			case R_SPARC_WDISP30:
@@ -744,11 +780,12 @@ void gen_code(const char *name, host_ulo
 				    "    *(uint32_t *)(gen_code_ptr + %d) = "
 				    "((*(uint32_t *)(gen_code_ptr + %d)) "
 				    " & ~0x3fffffff) "
-				    " | ((((%s + %d) - (long)gen_code_ptr)>>2) "
+				    " | ((((%s + %d) - (long)(gen_code_ptr + %d))>>2) "
 				    "    & 0x3fffffff);\n",
-				    rel->r_offset - offset,
-				    rel->r_offset - offset,
-				    name, addend);
+				    rel->r_offset - start_offset,
+				    rel->r_offset - start_offset,
+				    name, addend,
+				    rel->r_offset - start_offset);
 			    break;
                         default:
 			    error("unsupported sparc64 relocation (%d)", type);
@@ -933,7 +970,22 @@ fprintf(outfile,
 "    const uint32_t *opparam_ptr;\n"
 "    gen_code_ptr = gen_code_buf;\n"
 "    opc_ptr = opc_buf;\n"
-"    opparam_ptr = opparam_buf;\n"
+"    opparam_ptr = opparam_buf;\n");
+
+	/* Generate prologue, if needed. */ 
+	switch(ELF_ARCH) {
+	case EM_SPARC:
+		fprintf(outfile, "*((uint32_t *)gen_code_ptr)++ = 0x9c23a080; /* sub %%sp, 128, %%sp */\n");
+		fprintf(outfile, "*((uint32_t *)gen_code_ptr)++ = 0xbc27a080; /* sub %%fp, 128, %%fp */\n");
+		break;
+
+	case EM_SPARCV9:
+		fprintf(outfile, "*((uint32_t *)gen_code_ptr)++ = 0x9c23a100; /* sub %%sp, 256, %%sp */\n");
+		fprintf(outfile, "*((uint32_t *)gen_code_ptr)++ = 0xbc27a100; /* sub %%fp, 256, %%fp */\n");
+		break;
+	};
+
+fprintf(outfile,
 "    for(;;) {\n"
 "        switch(*opc_ptr++) {\n"
 );
@@ -961,7 +1013,7 @@ fprintf(outfile,
 " the_end:\n"
 );
 
-/* generate a return */ 
+/* generate epilogue */ 
     switch(ELF_ARCH) {
     case EM_386:
         fprintf(outfile, "*gen_code_ptr++ = 0xc3; /* ret */\n");
@@ -980,11 +1032,13 @@ fprintf(outfile,
         break;
     case EM_SPARC:
     case EM_SPARC32PLUS:
+	fprintf(outfile, "*((uint32_t *)gen_code_ptr)++ = 0xbc07a080; /* add %%fp, 256, %%fp */\n");
+	fprintf(outfile, "*((uint32_t *)gen_code_ptr)++ = 0x81c62008; /* jmpl %%i0 + 8, %%g0 */\n");
+	fprintf(outfile, "*((uint32_t *)gen_code_ptr)++ = 0x9c03a080; /* add %%sp, 256, %%sp */\n");
+        break;
     case EM_SPARCV9:
-	/* Fill the delay slot. */
-	fprintf(outfile, "*((uint32_t *)gen_code_ptr) = *((uint32_t *)gen_code_ptr - 1); /* delay slot */\n");
-	fprintf(outfile, "*((uint32_t *)gen_code_ptr - 1) = 0x81c3e008; /* retl */\n");
-	fprintf(outfile, "gen_code_ptr++;\n");
+	fprintf(outfile, "*((uint32_t *)gen_code_ptr)++ = 0x81c7e008; /* ret */\n");
+	fprintf(outfile, "*((uint32_t *)gen_code_ptr)++ = 0x81e80000; /* restore */\n");
         break;
     default:
 	error("unknown ELF architecture");
--- ./exec-i386.c.~1~	Mon May 12 18:05:54 2003
+++ ./exec-i386.c	Mon May 12 18:06:01 2003
@@ -153,6 +153,13 @@ void raise_exception_err(int exception_i
 {
     /* NOTE: the register at this point must be saved by hand because
        longjmp restore them */
+#ifdef __sparc__
+	/* We have to stay in the same register window as our caller,
+	 * thus this trick.
+	 */
+	__asm__ __volatile__("restore\n\t"
+			     "mov\t%o0, %i0");
+#endif
 #ifdef reg_EAX
     env->regs[R_EAX] = EAX;
 #endif
@@ -409,7 +416,15 @@ int cpu_x86_exec(CPUX86State *env1)
             /* execute the generated code */
             tc_ptr = tb->tc_ptr;
             gen_func = (void *)tc_ptr;
+#ifdef __sparc__
+	    __asm__ __volatile__("call	%0\n\t"
+				 " mov	%%o7,%%i0"
+				 : /* no outputs */
+				 : "r" (gen_func)
+				 : "i0", "i1", "i2", "i3", "i4", "i5");
+#else
             gen_func();
+#endif
         }
     }
     ret = env->exception_index;
--- ./exec-i386.h.~1~	Mon May 12 18:05:54 2003
+++ ./exec-i386.h	Mon May 12 18:06:01 2003
@@ -89,11 +89,27 @@ register unsigned int A0 asm("s2");
 register struct CPUX86State *env asm("s3");
 #endif
 #ifdef __sparc__
-register unsigned int T0 asm("l0");
-register unsigned int T1 asm("l1");
-register unsigned int A0 asm("l2");
-register struct CPUX86State *env asm("l3");
+register unsigned int EAX asm("l0");
+register unsigned int ECX asm("l1");
+register unsigned int EDX asm("l2");
+register unsigned int EBX asm("l3");
+register unsigned int ESP asm("l4");
+register unsigned int EBP asm("l5");
+register unsigned int ESI asm("l6");
+register unsigned int EDI asm("l7");
+register unsigned int T0 asm("g1");
+register unsigned int T1 asm("g2");
+register unsigned int A0 asm("g3");
+register struct CPUX86State *env asm("g6");
 #define USE_FP_CONVERT
+#define reg_EAX
+#define reg_ECX
+#define reg_EDX
+#define reg_EBX
+#define reg_ESP
+#define reg_EBP
+#define reg_ESI
+#define reg_EDI
 #endif
 #ifdef __s390__
 register unsigned int T0 asm("r7");
--- ./Makefile.~1~	Mon May 12 18:05:54 2003
+++ ./Makefile	Mon May 12 18:13:20 2003
@@ -29,6 +29,18 @@ OP_CFLAGS=$(CFLAGS)
 LDFLAGS+=-Wl,-T,s390.ld
 endif
 
+ifeq ($(ARCH),sparc)
+CFLAGS+=-m32 -ffixed-g1 -ffixed-g2 -ffixed-g3 -ffixed-g6
+LDFLAGS+=-m32
+OP_CFLAGS=$(CFLAGS) -fno-delayed-branch -ffixed-i0
+endif
+
+ifeq ($(ARCH),sparc64)
+CFLAGS+=-m64 -ffixed-g1 -ffixed-g2 -ffixed-g3 -ffixed-g6
+LDFLAGS+=-m64
+OP_CFLAGS=$(CFLAGS) -fno-delayed-branch -ffixed-i0
+endif
+
 ifeq ($(ARCH),alpha)
 # -msmall-data is not used because we want two-instruction relocations
 # for the constant constructions
--- ./configure.~1~	Mon May 12 18:05:54 2003
+++ ./configure	Mon May 12 18:06:01 2003
@@ -47,6 +47,12 @@ case "$cpu" in
   s390)
     cpu="s390"
   ;;
+  sparc)
+    cpu="sparc"
+  ;;
+  sparc64)
+    cpu="sparc64"
+  ;;
   ia64)
     cpu="ia64"
   ;;
@@ -131,7 +137,7 @@ fi
 else
 
 # if cross compiling, cannot launch a program, so make a static guess
-if test "$cpu" = "powerpc" -o "$cpu" = "mips" -o "$cpu" = "s390" ; then
+if test "$cpu" = "powerpc" -o "$cpu" = "mips" -o "$cpu" = "s390" -o "$cpu" = "sparc" -o "$cpu" = "sparc64"; then
     bigendian="yes"
 fi
 
@@ -217,6 +223,12 @@ elif test "$cpu" = "s390" ; then
 elif test "$cpu" = "alpha" ; then
   echo "ARCH=alpha" >> config.mak
   echo "#define HOST_ALPHA 1" >> $TMPH
+elif test "$cpu" = "sparc" ; then
+  echo "ARCH=sparc" >> config.mak
+  echo "#define HOST_SPARC 1" >> $TMPH
+elif test "$cpu" = "sparc64" ; then
+  echo "ARCH=sparc64" >> config.mak
+  echo "#define HOST_SPARC64 1" >> $TMPH
 elif test "$cpu" = "ia64" ; then
   echo "ARCH=ia64" >> config.mak
   echo "#define HOST_IA64 1" >> $TMPH
^ permalink raw reply	[flat|nested] 10+ messages in thread- * [Qemu-devel] Re: [PATCH] Updated Sparc support
  2003-05-13  4:13 [Qemu-devel] [PATCH] Updated Sparc support David S. Miller
@ 2003-05-13 12:31 ` Fabrice Bellard
  2003-05-13 19:49   ` David S. Miller
  2003-05-14  0:13   ` Rusty Russell
  2003-05-15 13:03 ` [Qemu-devel] [PATCH] Updated Sparc support Johan Rydberg
  1 sibling, 2 replies; 10+ messages in thread
From: Fabrice Bellard @ 2003-05-13 12:31 UTC (permalink / raw)
  To: qemu-devel
David S. Miller wrote:
> Fabrice, here are updated patches for Sparc support.
> With the mmap changes you installed today, things work
> quite well :-)
Thanx! I'll commit them tonight.
> The current state is that for 32-bit Sparc static binaries seem to
> work and test-i386 passes except for 2 or 3 float tests which I will
> fix shortly.  My plan is to fix the float tests, make sure 64-bit
> sparc is working properly, then move on to playing with dynamically
> loaded libraries and making other improvements.
OK. There seem to be a bug in mmap.c when using 'qemu -p 16384 /bin/ls' 
on i386 (although -p 8192 and -p 32768 work !). I will investigate.
My next patches will mainly add support for code invalidation (when 
unloading dlls for example) and self-modifying code (by using mprotect 
to protect host pages containing writable translated code - that's why 
unlike em86 I chose to maintain mapping of the target pages instead of 
the host ones). It is a vital feature to have good performances in 
dosemu. The support for the custom signal handler for SIGSEGV and SIGBUS 
in exec-i386.c will become mandatory for all host CPUs.
I also plan to add direct block chaining. I will try to make it portable 
by using the 'goto *' gcc extension, but I don't know yet if it will 
work on every CPU. The direct block chaining will generate something like:
  'goto *addr'
at the end of some translated blocks to jump either to the CPU core or 
directly to the next translated block. 'addr' will be a global 'void *' 
variable. Since no code will be patched to change block chaining, it 
will simplify the instruction cache invalidation issues and the 
threading issues.
Another subject: as you may have seen, most of the qemu code is designed 
to be independent of the target CPU. I hope that someone will have the 
time to add support for some other target CPUs such as PowerPC, ARM, 
MIPS or Sparc :-)
It would be an useful tool for embedded Linux software development or 
for automatic testing of Linux software without bothering to have 
accounts on every possible Linux host (it would ease the testing of qemu 
itself :-)).
Fabrice.
^ permalink raw reply	[flat|nested] 10+ messages in thread 
- * [Qemu-devel] Re: [PATCH] Updated Sparc support
  2003-05-13 12:31 ` [Qemu-devel] " Fabrice Bellard
@ 2003-05-13 19:49   ` David S. Miller
  2003-05-14  0:13   ` Rusty Russell
  1 sibling, 0 replies; 10+ messages in thread
From: David S. Miller @ 2003-05-13 19:49 UTC (permalink / raw)
  To: fabrice.bellard; +Cc: qemu-devel
   From: Fabrice Bellard <fabrice.bellard@free.fr>
   Date: Tue, 13 May 2003 14:31:22 +0200
   I also plan to add direct block chaining. I will try to make it portable 
   by using the 'goto *' gcc extension, but I don't know yet if it will 
   work on every CPU.
This should be OK, but we may run into issues on platforms that
have non-trivial epilogue/prologue in the code that they output.
I think on the most complex cpu in this area (Sparc :-) we should
be OK.
^ permalink raw reply	[flat|nested] 10+ messages in thread 
- * Re: [Qemu-devel] Re: [PATCH] Updated Sparc support
  2003-05-13 12:31 ` [Qemu-devel] " Fabrice Bellard
  2003-05-13 19:49   ` David S. Miller
@ 2003-05-14  0:13   ` Rusty Russell
  2003-05-14 11:48     ` Fabrice Bellard
  1 sibling, 1 reply; 10+ messages in thread
From: Rusty Russell @ 2003-05-14  0:13 UTC (permalink / raw)
  To: qemu-devel
In message <3EC0E59A.5070907@free.fr> you write:
> I also plan to add direct block chaining. I will try to make it portable 
> by using the 'goto *' gcc extension, but I don't know yet if it will 
> work on every CPU. The direct block chaining will generate something like:
> 
>   'goto *addr'
> 
> at the end of some translated blocks to jump either to the CPU core or 
> directly to the next translated block. 'addr' will be a global 'void *' 
> variable. Since no code will be patched to change block chaining, it 
> will simplify the instruction cache invalidation issues and the 
> threading issues.
Hmm, I had a more ambitious idea, and that was to keep simple stats on
which block last followed each block: if it goes to the same block
more than N times in a row, coalesce/chain them.
As blocks get longer, you have more opportunities for register
lifetime analsis, which could eliminate redundant stores to registers
in particular.
I haven't got actual code, so I haven't mentioned it before...
Thoughts?
Rusty.
--
  Anyone who quotes me in their sig is an idiot. -- Rusty Russell.
^ permalink raw reply	[flat|nested] 10+ messages in thread 
- * Re: [Qemu-devel] Re: [PATCH] Updated Sparc support
  2003-05-14  0:13   ` Rusty Russell
@ 2003-05-14 11:48     ` Fabrice Bellard
  2003-05-16 13:54       ` [Qemu-devel] Regression tests - syscall note Fabrice Bellard
  0 siblings, 1 reply; 10+ messages in thread
From: Fabrice Bellard @ 2003-05-14 11:48 UTC (permalink / raw)
  To: qemu-devel
Rusty Russell wrote:
> In message <3EC0E59A.5070907@free.fr> you write:
> 
>>I also plan to add direct block chaining. I will try to make it portable 
>>by using the 'goto *' gcc extension, but I don't know yet if it will 
>>work on every CPU. The direct block chaining will generate something like:
>>
>>  'goto *addr'
>>
>>at the end of some translated blocks to jump either to the CPU core or 
>>directly to the next translated block. 'addr' will be a global 'void *' 
>>variable. Since no code will be patched to change block chaining, it 
>>will simplify the instruction cache invalidation issues and the 
>>threading issues.
> 
> 
> Hmm, I had a more ambitious idea, and that was to keep simple stats on
> which block last followed each block: if it goes to the same block
> more than N times in a row, coalesce/chain them.
> 
> As blocks get longer, you have more opportunities for register
> lifetime analsis, which could eliminate redundant stores to registers
> in particular.
> 
> I haven't got actual code, so I haven't mentioned it before...
> 
> Thoughts?
It could be interesting to avoid some condition codes computations. 
Currently it is not possible to do more because qemu has no generic IR 
and I think I won't have the time to add one. Julian Seward (of the 
valgrind project) is thinking about adding a more generic IR in valgrind 
to allow cross debugging, so it might be interesting for valgrind.
BUT, I have a much simpler approach "a la FX!32" which has the advantage 
of being very simple and which needs very little modification in qemu:
You can launch your executable a first time to record statistics. Then 
you launch a special tool 'qemuopt' which statically generates a dynamic 
library with gcc containing the host cpu code of the most used basic 
block chains.
'qemuopt' is very easy to do : I discovered that by noting that gcc 
optimizes very well 'static inline' local functions. So you just have to 
generate a C source containing approximately:
void genfunc(CPUX86State *env)
{
     uint32_t T0, EAX, EBX, ...;
     EAX = env->regs[R_EAX];
     EBX = env->regs[R_EBX];
#define OPPROTO 'static inline'
#include "op-i386.c"
     op_movl_T0_EAX();
     op_movl_EBX_T0();
     env->regs[R_EAX] = EAX;
     env->regs[R_EBX] = EBX;
}
Then gcc does all the hard work for us :-)
Fabrice.
^ permalink raw reply	[flat|nested] 10+ messages in thread
- * [Qemu-devel] Regression tests - syscall note
  2003-05-14 11:48     ` Fabrice Bellard
@ 2003-05-16 13:54       ` Fabrice Bellard
  0 siblings, 0 replies; 10+ messages in thread
From: Fabrice Bellard @ 2003-05-16 13:54 UTC (permalink / raw)
  To: qemu-devel
Hi,
I just updated test-i386 so that it includes vm86, exceptions and 
self-modifying code tests. I hope it will help qemu ports on other CPUs.
About the Sparc and Alpha ports, some changes are needed in syscall.c to 
convert errno codes, signal numbers and various syscall bit fields. The 
corresponding code can be extracted from em86 if needed.
A more generic problem is that qemu currently uses 'errno' to get the 
error code of syscalls. The problem is that it is not thread safe as 
qemu does not rely on the glibc pthread library. My current idea is to 
call directly all the syscalls from qemu and to use the real Linux 
return value instead of errno. Is there a better solution ?
Fabrice.
^ permalink raw reply	[flat|nested] 10+ messages in thread 
 
 
 
- * Re: [Qemu-devel] [PATCH] Updated Sparc support
  2003-05-13  4:13 [Qemu-devel] [PATCH] Updated Sparc support David S. Miller
  2003-05-13 12:31 ` [Qemu-devel] " Fabrice Bellard
@ 2003-05-15 13:03 ` Johan Rydberg
  2003-05-15 20:05   ` David S. Miller
  1 sibling, 1 reply; 10+ messages in thread
From: Johan Rydberg @ 2003-05-15 13:03 UTC (permalink / raw)
  To: qemu-devel
David wrote:
> Like PPC, I now hold all the main x86 registers (EAX etc.) in Sparc
> registers.  This was tricky to accomplish with register windows, but
> finally it works...
Do you still use the register windows, or does GCC generate code without
save/restore?  Is this possible, really?
The reason for me asking is that I some time soon "port" my simulator 
[1]
to SPARC hosts, and it needs to have a lot of global registers (more 
than
those 6-7 available).  I
> +ifeq ($(ARCH),sparc)
> +CFLAGS+=-m32 -ffixed-g1 -ffixed-g2 -ffixed-g3 -ffixed-g6
> +LDFLAGS+=-m32
> +OP_CFLAGS=$(CFLAGS) -fno-delayed-branch -ffixed-i0
> +endif
> +
> +ifeq ($(ARCH),sparc64)
> +CFLAGS+=-m64 -ffixed-g1 -ffixed-g2 -ffixed-g3 -ffixed-g6
> +LDFLAGS+=-m64
> +OP_CFLAGS=$(CFLAGS) -fno-delayed-branch -ffixed-i0
> +endif
Is the fixed flags really needed or is it just because the global
register definitions is made in op-i386.c (or whatever) ?
brgds,
johan
[1] http://www.nongnu.org/guss/
^ permalink raw reply	[flat|nested] 10+ messages in thread 
- * Re: [Qemu-devel] [PATCH] Updated Sparc support
  2003-05-15 13:03 ` [Qemu-devel] [PATCH] Updated Sparc support Johan Rydberg
@ 2003-05-15 20:05   ` David S. Miller
  2003-05-15 20:17     ` Johan Rydberg
  0 siblings, 1 reply; 10+ messages in thread
From: David S. Miller @ 2003-05-15 20:05 UTC (permalink / raw)
  To: qemu-devel, jrydberg
   From: Johan Rydberg <jrydberg@night.trouble.net>
   Date: Thu, 15 May 2003 15:03:33 +0200
   Do you still use the register windows, or does GCC generate code without
   save/restore?  Is this possible, really?
   
I used register windows.
Making GCC generate non-register windowed code using -mflat is
unusable because due to a bug it does not respect the
-fno-delayed-branch option when in that mode.
   The reason for me asking is that I some time soon "port" my simulator 
   [1] to SPARC hosts, and it needs to have a lot of global registers
   (more than those 6-7 available).
   
Just only execute the generated code in a fixed context and thus
in the same register window, then at the call site into the generated
code clobber all of the input registers.
Just look at the qemu code, it works quite well.
   Is the fixed flags really needed or is it just because the global
   register definitions is made in op-i386.c (or whatever) ?
   
You really do need this, because you cannot guarentee that every
source file will see the global register declarations in exec-i386.h
^ permalink raw reply	[flat|nested] 10+ messages in thread 
- * Re: [Qemu-devel] [PATCH] Updated Sparc support
  2003-05-15 20:05   ` David S. Miller
@ 2003-05-15 20:17     ` Johan Rydberg
  2003-05-15 20:24       ` David S. Miller
  0 siblings, 1 reply; 10+ messages in thread
From: Johan Rydberg @ 2003-05-15 20:17 UTC (permalink / raw)
  To: David S. Miller; +Cc: qemu-devel
On 2003.05.15 22:05 David S. Miller wrote:
>    The reason for me asking is that I some time soon "port" my
> simulator
>    [1] to SPARC hosts, and it needs to have a lot of global registers
>    (more than those 6-7 available).
> 
> Just only execute the generated code in a fixed context and thus
> in the same register window, then at the call site into the generated
> code clobber all of the input registers.
The global registers will initially not be used for dynamic generated
code, but in the direct threaded interpreter.  Using global registers
for commonly used variables such as the CPU structure and current 
virtual
program counter will increase the performance with a magnitude I guess.
So, if I understand you correctly, everything should work pretty good
i you use -fflat and stay away from the input registers?  (I have no
need for the -fno-delay-slot flag)
> Just look at the qemu code, it works quite well.
Will do that aswell.
brgds,
Johan.
^ permalink raw reply	[flat|nested] 10+ messages in thread 
- * Re: [Qemu-devel] [PATCH] Updated Sparc support
  2003-05-15 20:17     ` Johan Rydberg
@ 2003-05-15 20:24       ` David S. Miller
  0 siblings, 0 replies; 10+ messages in thread
From: David S. Miller @ 2003-05-15 20:24 UTC (permalink / raw)
  To: jrydberg; +Cc: qemu-devel
   From: Johan Rydberg <jrydberg@night.trouble.net>
   Date: Thu, 15 May 2003 22:17:24 +0200
   Using global registers for commonly used variables such as the CPU
   structure and current virtual program counter will increase the
   performance with a magnitude I guess.
   
   So, if I understand you correctly, everything should work pretty good
   i you use -fflat and stay away from the input registers?  (I have no
   need for the -fno-delay-slot flag)
   
-mflat SUCKS, it generates the worst code possible.
Also, if you're going to use global register, be forewarned that
if you call into libc you risk them being clobbered.  Sparc-linux
libc clobbers just about every global register in some piece of
assembly somewhere.
There are also very strict rules about which global register an
application can use in this fashion.  Please refer to the standards
documents about exactly which these are (they are different for 32-bit
and 64-bit sparc) and no I don't have time to discuss this further
nor point you at where to get these documents.:-)
^ permalink raw reply	[flat|nested] 10+ messages in thread 
 
 
 
end of thread, other threads:[~2003-05-16 14:34 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-05-13  4:13 [Qemu-devel] [PATCH] Updated Sparc support David S. Miller
2003-05-13 12:31 ` [Qemu-devel] " Fabrice Bellard
2003-05-13 19:49   ` David S. Miller
2003-05-14  0:13   ` Rusty Russell
2003-05-14 11:48     ` Fabrice Bellard
2003-05-16 13:54       ` [Qemu-devel] Regression tests - syscall note Fabrice Bellard
2003-05-15 13:03 ` [Qemu-devel] [PATCH] Updated Sparc support Johan Rydberg
2003-05-15 20:05   ` David S. Miller
2003-05-15 20:17     ` Johan Rydberg
2003-05-15 20:24       ` David S. Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).