qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [4799] Add instruction counter.
@ 2008-06-29  1:03 Paul Brook
       [not found] ` <6D074CEF-5086-4301-A19C-F1E76E6B313D@hotmail.com>
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Paul Brook @ 2008-06-29  1:03 UTC (permalink / raw)
  To: qemu-devel

Revision: 4799
          http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4799
Author:   pbrook
Date:     2008-06-29 01:03:05 +0000 (Sun, 29 Jun 2008)

Log Message:
-----------
Add instruction counter.

Modified Paths:
--------------
    trunk/cpu-all.h
    trunk/cpu-defs.h
    trunk/cpu-exec.c
    trunk/exec-all.h
    trunk/exec.c
    trunk/hw/mips_timer.c
    trunk/qemu-doc.texi
    trunk/softmmu_template.h
    trunk/target-alpha/cpu.h
    trunk/target-alpha/translate.c
    trunk/target-arm/cpu.h
    trunk/target-arm/translate.c
    trunk/target-cris/cpu.h
    trunk/target-cris/translate.c
    trunk/target-i386/cpu.h
    trunk/target-i386/translate.c
    trunk/target-m68k/cpu.h
    trunk/target-m68k/translate.c
    trunk/target-mips/cpu.h
    trunk/target-mips/translate.c
    trunk/target-ppc/cpu.h
    trunk/target-ppc/helper.c
    trunk/target-ppc/translate.c
    trunk/target-sh4/cpu.h
    trunk/target-sh4/translate.c
    trunk/target-sparc/cpu.h
    trunk/target-sparc/translate.c
    trunk/translate-all.c
    trunk/vl.c

Modified: trunk/cpu-all.h
===================================================================
--- trunk/cpu-all.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/cpu-all.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -782,6 +782,8 @@
     __attribute__ ((__noreturn__));
 extern CPUState *first_cpu;
 extern CPUState *cpu_single_env;
+extern int64_t qemu_icount;
+extern int use_icount;
 
 #define CPU_INTERRUPT_EXIT   0x01 /* wants exit from main loop */
 #define CPU_INTERRUPT_HARD   0x02 /* hardware interrupt pending */

Modified: trunk/cpu-defs.h
===================================================================
--- trunk/cpu-defs.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/cpu-defs.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -130,17 +130,29 @@
                    sizeof(target_phys_addr_t))];
 } CPUTLBEntry;
 
+#ifdef WORDS_BIGENDIAN
+typedef struct icount_decr_u16 {
+    uint16_t high;
+    uint16_t low;
+} icount_decr_u16;
+#else
+typedef struct icount_decr_u16 {
+    uint16_t low;
+    uint16_t high;
+} icount_decr_u16;
+#endif
+
 #define CPU_TEMP_BUF_NLONGS 128
 #define CPU_COMMON                                                      \
     struct TranslationBlock *current_tb; /* currently executing TB  */  \
     /* soft mmu support */                                              \
-    /* in order to avoid passing too many arguments to the memory       \
-       write helpers, we store some rarely used information in the CPU  \
+    /* in order to avoid passing too many arguments to the MMIO         \
+       helpers, we store some rarely used information in the CPU        \
        context) */                                                      \
-    unsigned long mem_write_pc; /* host pc at which the memory was      \
-                                   written */                           \
-    target_ulong mem_write_vaddr; /* target virtual addr at which the   \
-                                     memory was written */              \
+    unsigned long mem_io_pc; /* host pc at which the memory was         \
+                                accessed */                             \
+    target_ulong mem_io_vaddr; /* target virtual addr at which the      \
+                                     memory was accessed */             \
     int halted; /* TRUE if the CPU is in suspend state */               \
     /* The meaning of the MMU modes is defined in the target code. */   \
     CPUTLBEntry tlb_table[NB_MMU_MODES][CPU_TLB_SIZE];                  \
@@ -149,6 +161,16 @@
     /* buffer for temporaries in the code generator */                  \
     long temp_buf[CPU_TEMP_BUF_NLONGS];                                 \
                                                                         \
+    int64_t icount_extra; /* Instructions until next timer event.  */   \
+    /* Number of cycles left, with interrupt flag in high bit.          \
+       This allows a single read-compare-cbranch-write sequence to test \
+       for both decrementer underflow and exceptions.  */               \
+    union {                                                             \
+        uint32_t u32;                                                   \
+        icount_decr_u16 u16;                                            \
+    } icount_decr;                                                      \
+    uint32_t can_do_io; /* nonzero if memory mapped IO is safe.  */     \
+                                                                        \
     /* from this point: preserved by CPU reset */                       \
     /* ice debug support */                                             \
     target_ulong breakpoints[MAX_BREAKPOINTS];                          \

Modified: trunk/cpu-exec.c
===================================================================
--- trunk/cpu-exec.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/cpu-exec.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -82,15 +82,40 @@
     longjmp(env->jmp_env, 1);
 }
 
+/* Execute the code without caching the generated code. An interpreter
+   could be used if available. */
+static void cpu_exec_nocache(int max_cycles, TranslationBlock *orig_tb)
+{
+    unsigned long next_tb;
+    TranslationBlock *tb;
+
+    /* Should never happen.
+       We only end up here when an existing TB is too long.  */
+    if (max_cycles > CF_COUNT_MASK)
+        max_cycles = CF_COUNT_MASK;
+
+    tb = tb_gen_code(env, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
+                     max_cycles);
+    env->current_tb = tb;
+    /* execute the generated code */
+    next_tb = tcg_qemu_tb_exec(tb->tc_ptr);
+
+    if ((next_tb & 3) == 2) {
+        /* Restore PC.  This may happen if async event occurs before
+           the TB starts executing.  */
+        CPU_PC_FROM_TB(env, tb);
+    }
+    tb_phys_invalidate(tb, -1);
+    tb_free(tb);
+}
+
 static TranslationBlock *tb_find_slow(target_ulong pc,
                                       target_ulong cs_base,
                                       uint64_t flags)
 {
     TranslationBlock *tb, **ptb1;
-    int code_gen_size;
     unsigned int h;
     target_ulong phys_pc, phys_page1, phys_page2, virt_page2;
-    uint8_t *tc_ptr;
 
     tb_invalidated_flag = 0;
 
@@ -124,31 +149,9 @@
         ptb1 = &tb->phys_hash_next;
     }
  not_found:
-    /* if no translated code available, then translate it now */
-    tb = tb_alloc(pc);
-    if (!tb) {
-        /* flush must be done */
-        tb_flush(env);
-        /* cannot fail at this point */
-        tb = tb_alloc(pc);
-        /* don't forget to invalidate previous TB info */
-        tb_invalidated_flag = 1;
-    }
-    tc_ptr = code_gen_ptr;
-    tb->tc_ptr = tc_ptr;
-    tb->cs_base = cs_base;
-    tb->flags = flags;
-    cpu_gen_code(env, tb, &code_gen_size);
-    code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+   /* if no translated code available, then translate it now */
+    tb = tb_gen_code(env, pc, cs_base, flags, 0);
 
-    /* check next page if needed */
-    virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
-    phys_page2 = -1;
-    if ((pc & TARGET_PAGE_MASK) != virt_page2) {
-        phys_page2 = get_phys_addr_code(env, virt_page2);
-    }
-    tb_link_phys(tb, phys_pc, phys_page2);
-
  found:
     /* we add the TB in the virtual pc hash table */
     env->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
@@ -583,6 +586,7 @@
                        of memory exceptions while generating the code, we
                        must recompute the hash index here */
                     next_tb = 0;
+                    tb_invalidated_flag = 0;
                 }
 #ifdef DEBUG_EXEC
                 if ((loglevel & CPU_LOG_EXEC)) {
@@ -604,16 +608,45 @@
                 }
                 }
                 spin_unlock(&tb_lock);
-                tc_ptr = tb->tc_ptr;
                 env->current_tb = tb;
+                while (env->current_tb) {
+                    tc_ptr = tb->tc_ptr;
                 /* execute the generated code */
 #if defined(__sparc__) && !defined(HOST_SOLARIS)
 #undef env
-                env = cpu_single_env;
+                    env = cpu_single_env;
 #define env cpu_single_env
 #endif
-                next_tb = tcg_qemu_tb_exec(tc_ptr);
-                env->current_tb = NULL;
+                    next_tb = tcg_qemu_tb_exec(tc_ptr);
+                    env->current_tb = NULL;
+                    if ((next_tb & 3) == 2) {
+                        /* Instruction counter exired.  */
+                        int insns_left;
+                        tb = (TranslationBlock *)(long)(next_tb & ~3);
+                        /* Restore PC.  */
+                        CPU_PC_FROM_TB(env, tb);
+                        insns_left = env->icount_decr.u32;
+                        if (env->icount_extra && insns_left >= 0) {
+                            /* Refill decrementer and continue execution.  */
+                            env->icount_extra += insns_left;
+                            if (env->icount_extra > 0xffff) {
+                                insns_left = 0xffff;
+                            } else {
+                                insns_left = env->icount_extra;
+                            }
+                            env->icount_extra -= insns_left;
+                            env->icount_decr.u16.low = insns_left;
+                        } else {
+                            if (insns_left > 0) {
+                                /* Execute remaining instructions.  */
+                                cpu_exec_nocache(insns_left, tb);
+                            }
+                            env->exception_index = EXCP_INTERRUPT;
+                            next_tb = 0;
+                            cpu_loop_exit();
+                        }
+                    }
+                }
                 /* reset soft MMU for next block (it can currently
                    only be set by a memory fault) */
 #if defined(USE_KQEMU)

Modified: trunk/exec-all.h
===================================================================
--- trunk/exec-all.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/exec-all.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -27,7 +27,7 @@
 #define DISAS_UPDATE  2 /* cpu state was modified dynamically */
 #define DISAS_TB_JUMP 3 /* only pc was modified statically */
 
-struct TranslationBlock;
+typedef struct TranslationBlock TranslationBlock;
 
 /* XXX: make safe guess about sizes */
 #define MAX_OP_PER_INSTR 64
@@ -48,6 +48,7 @@
 extern target_ulong gen_opc_npc[OPC_BUF_SIZE];
 extern uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
 extern uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
+extern uint16_t gen_opc_icount[OPC_BUF_SIZE];
 extern target_ulong gen_opc_jump_pc[2];
 extern uint32_t gen_opc_hflags[OPC_BUF_SIZE];
 
@@ -75,6 +76,10 @@
                            CPUState *env, unsigned long searched_pc,
                            void *puc);
 void cpu_resume_from_signal(CPUState *env1, void *puc);
+void cpu_io_recompile(CPUState *env, void *retaddr);
+TranslationBlock *tb_gen_code(CPUState *env, 
+                              target_ulong pc, target_ulong cs_base, int flags,
+                              int cflags);
 void cpu_exec_init(CPUState *env);
 int page_unprotect(target_ulong address, unsigned long pc, void *puc);
 void tb_invalidate_phys_page_range(target_phys_addr_t start, target_phys_addr_t end,
@@ -117,16 +122,15 @@
 #define USE_DIRECT_JUMP
 #endif
 
-typedef struct TranslationBlock {
+struct TranslationBlock {
     target_ulong pc;   /* simulated PC corresponding to this block (EIP + CS base) */
     target_ulong cs_base; /* CS base for this block */
     uint64_t flags; /* flags defining in which context the code was generated */
     uint16_t size;      /* size of target code for this block (1 <=
                            size <= TARGET_PAGE_SIZE) */
     uint16_t cflags;    /* compile flags */
-#define CF_TB_FP_USED  0x0002 /* fp ops are used in the TB */
-#define CF_FP_USED     0x0004 /* fp ops are used in the TB or in a chained TB */
-#define CF_SINGLE_INSN 0x0008 /* compile only a single instruction */
+#define CF_COUNT_MASK  0x7fff
+#define CF_LAST_IO     0x8000 /* Last insn may be an IO access.  */
 
     uint8_t *tc_ptr;    /* pointer to the translated code */
     /* next matching tb for physical address. */
@@ -150,7 +154,8 @@
        jmp_first */
     struct TranslationBlock *jmp_next[2];
     struct TranslationBlock *jmp_first;
-} TranslationBlock;
+    uint32_t icount;
+};
 
 static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
 {
@@ -173,9 +178,11 @@
 }
 
 TranslationBlock *tb_alloc(target_ulong pc);
+void tb_free(TranslationBlock *tb);
 void tb_flush(CPUState *env);
 void tb_link_phys(TranslationBlock *tb,
                   target_ulong phys_pc, target_ulong phys_page2);
+void tb_phys_invalidate(TranslationBlock *tb, target_ulong page_addr);
 
 extern TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
 extern uint8_t *code_gen_ptr;
@@ -364,6 +371,20 @@
     }
     return addr + env1->tlb_table[mmu_idx][page_index].addend - (unsigned long)phys_ram_base;
 }
+
+/* Deterministic execution requires that IO only be performaed on the last
+   instruction of a TB so that interrupts take effect immediately.  */
+static inline int can_do_io(CPUState *env)
+{
+    if (!use_icount)
+        return 1;
+
+    /* If not executing code then assume we are ok.  */
+    if (!env->current_tb)
+        return 1;
+
+    return env->can_do_io != 0;
+}
 #endif
 
 #ifdef USE_KQEMU

Modified: trunk/exec.c
===================================================================
--- trunk/exec.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/exec.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -107,6 +107,13 @@
 /* current CPU in the current thread. It is only valid inside
    cpu_exec() */
 CPUState *cpu_single_env;
+/* 0 = Do not count executed instructions.
+   1 = Precice instruction counting.
+   2 = Adaptive rate instruction counting.  */
+int use_icount = 0;
+/* Current instruction counter.  While executing translated code this may
+   include some instructions that have not yet been executed.  */
+int64_t qemu_icount;
 
 typedef struct PageDesc {
     /* list of TBs intersecting this ram page */
@@ -633,7 +640,7 @@
     tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
 }
 
-static inline void tb_phys_invalidate(TranslationBlock *tb, target_ulong page_addr)
+void tb_phys_invalidate(TranslationBlock *tb, target_ulong page_addr)
 {
     CPUState *env;
     PageDesc *p;
@@ -746,11 +753,9 @@
     }
 }
 
-#ifdef TARGET_HAS_PRECISE_SMC
-
-static void tb_gen_code(CPUState *env,
-                        target_ulong pc, target_ulong cs_base, int flags,
-                        int cflags)
+TranslationBlock *tb_gen_code(CPUState *env,
+                              target_ulong pc, target_ulong cs_base,
+                              int flags, int cflags)
 {
     TranslationBlock *tb;
     uint8_t *tc_ptr;
@@ -764,6 +769,8 @@
         tb_flush(env);
         /* cannot fail at this point */
         tb = tb_alloc(pc);
+        /* Don't forget to invalidate previous TB info.  */
+        tb_invalidated_flag = 1;
     }
     tc_ptr = code_gen_ptr;
     tb->tc_ptr = tc_ptr;
@@ -780,8 +787,8 @@
         phys_page2 = get_phys_addr_code(env, virt_page2);
     }
     tb_link_phys(tb, phys_pc, phys_page2);
+    return tb;
 }
-#endif
 
 /* invalidate all TBs which intersect with the target physical page
    starting in range [start;end[. NOTE: start and end must refer to
@@ -836,13 +843,13 @@
             if (current_tb_not_found) {
                 current_tb_not_found = 0;
                 current_tb = NULL;
-                if (env->mem_write_pc) {
+                if (env->mem_io_pc) {
                     /* now we have a real cpu fault */
-                    current_tb = tb_find_pc(env->mem_write_pc);
+                    current_tb = tb_find_pc(env->mem_io_pc);
                 }
             }
             if (current_tb == tb &&
-                !(current_tb->cflags & CF_SINGLE_INSN)) {
+                (current_tb->cflags & CF_COUNT_MASK) != 1) {
                 /* If we are modifying the current TB, we must stop
                 its execution. We could be more precise by checking
                 that the modification is after the current PC, but it
@@ -851,7 +858,7 @@
 
                 current_tb_modified = 1;
                 cpu_restore_state(current_tb, env,
-                                  env->mem_write_pc, NULL);
+                                  env->mem_io_pc, NULL);
 #if defined(TARGET_I386)
                 current_flags = env->hflags;
                 current_flags |= (env->eflags & (IOPL_MASK | TF_MASK | VM_MASK));
@@ -883,7 +890,7 @@
     if (!p->first_tb) {
         invalidate_page_bitmap(p);
         if (is_cpu_write_access) {
-            tlb_unprotect_code_phys(env, start, env->mem_write_vaddr);
+            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
         }
     }
 #endif
@@ -893,8 +900,7 @@
            modifying the memory. It will ensure that it cannot modify
            itself */
         env->current_tb = NULL;
-        tb_gen_code(env, current_pc, current_cs_base, current_flags,
-                    CF_SINGLE_INSN);
+        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
         cpu_resume_from_signal(env, NULL);
     }
 #endif
@@ -909,7 +915,7 @@
     if (1) {
         if (loglevel) {
             fprintf(logfile, "modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
-                   cpu_single_env->mem_write_vaddr, len,
+                   cpu_single_env->mem_io_vaddr, len,
                    cpu_single_env->eip,
                    cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
         }
@@ -961,7 +967,7 @@
         tb = (TranslationBlock *)((long)tb & ~3);
 #ifdef TARGET_HAS_PRECISE_SMC
         if (current_tb == tb &&
-            !(current_tb->cflags & CF_SINGLE_INSN)) {
+            (current_tb->cflags & CF_COUNT_MASK) != 1) {
                 /* If we are modifying the current TB, we must stop
                    its execution. We could be more precise by checking
                    that the modification is after the current PC, but it
@@ -990,8 +996,7 @@
            modifying the memory. It will ensure that it cannot modify
            itself */
         env->current_tb = NULL;
-        tb_gen_code(env, current_pc, current_cs_base, current_flags,
-                    CF_SINGLE_INSN);
+        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
         cpu_resume_from_signal(env, puc);
     }
 #endif
@@ -1068,6 +1073,17 @@
     return tb;
 }
 
+void tb_free(TranslationBlock *tb)
+{
+    /* In practice this is mostly used for single use temorary TB
+       Ignore the hard cases and just back up if this TB happens to
+       be the last one generated.  */
+    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
+        code_gen_ptr = tb->tc_ptr;
+        nb_tbs--;
+    }
+}
+
 /* add a new TB and link it to the physical page tables. phys_page2 is
    (-1) to indicate that only one page contains the TB. */
 void tb_link_phys(TranslationBlock *tb,
@@ -1369,7 +1385,9 @@
     TranslationBlock *tb;
     static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
 #endif
+    int old_mask;
 
+    old_mask = env->interrupt_request;
     /* FIXME: This is probably not threadsafe.  A different thread could
        be in the mittle of a read-modify-write operation.  */
     env->interrupt_request |= mask;
@@ -1379,13 +1397,25 @@
        emulation this often isn't actually as bad as it sounds.  Often
        signals are used primarily to interrupt blocking syscalls.  */
 #else
-    /* if the cpu is currently executing code, we must unlink it and
-       all the potentially executing TB */
-    tb = env->current_tb;
-    if (tb && !testandset(&interrupt_lock)) {
-        env->current_tb = NULL;
-        tb_reset_jump_recursive(tb);
-        resetlock(&interrupt_lock);
+    if (use_icount) {
+        env->icount_decr.u16.high = 0x8000;
+#ifndef CONFIG_USER_ONLY
+        /* CPU_INTERRUPT_EXIT isn't a real interrupt.  It just means
+           an async event happened and we need to process it.  */
+        if (!can_do_io(env)
+            && (mask & ~(old_mask | CPU_INTERRUPT_EXIT)) != 0) {
+            cpu_abort(env, "Raised interrupt while not in I/O function");
+        }
+#endif
+    } else {
+        tb = env->current_tb;
+        /* if the cpu is currently executing code, we must unlink it and
+           all the potentially executing TB */
+        if (tb && !testandset(&interrupt_lock)) {
+            env->current_tb = NULL;
+            tb_reset_jump_recursive(tb);
+            resetlock(&interrupt_lock);
+        }
     }
 #endif
 }
@@ -2227,7 +2257,7 @@
     /* we remove the notdirty callback only if the code has been
        flushed */
     if (dirty_flags == 0xff)
-        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_write_vaddr);
+        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
 }
 
 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
@@ -2252,7 +2282,7 @@
     /* we remove the notdirty callback only if the code has been
        flushed */
     if (dirty_flags == 0xff)
-        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_write_vaddr);
+        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
 }
 
 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
@@ -2277,7 +2307,7 @@
     /* we remove the notdirty callback only if the code has been
        flushed */
     if (dirty_flags == 0xff)
-        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_write_vaddr);
+        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
 }
 
 static CPUReadMemoryFunc *error_mem_read[3] = {
@@ -2299,7 +2329,7 @@
     target_ulong vaddr;
     int i;
 
-    vaddr = (env->mem_write_vaddr & TARGET_PAGE_MASK) + offset;
+    vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
     for (i = 0; i < env->nb_watchpoints; i++) {
         if (vaddr == env->watchpoint[i].vaddr
                 && (env->watchpoint[i].type & flags)) {
@@ -2967,6 +2997,65 @@
     return 0;
 }
 
+/* in deterministic execution mode, instructions doing device I/Os
+   must be at the end of the TB */
+void cpu_io_recompile(CPUState *env, void *retaddr)
+{
+    TranslationBlock *tb;
+    uint32_t n, cflags;
+    target_ulong pc, cs_base;
+    uint64_t flags;
+
+    tb = tb_find_pc((unsigned long)retaddr);
+    if (!tb) {
+        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
+                  retaddr);
+    }
+    n = env->icount_decr.u16.low + tb->icount;
+    cpu_restore_state(tb, env, (unsigned long)retaddr, NULL);
+    /* Calculate how many instructions had been executed before the fault
+       occured.  */
+    n = n - env->icount_decr.u16.low;
+    /* Generate a new TB ending on the I/O insn.  */
+    n++;
+    /* On MIPS and SH, delay slot instructions can only be restarted if
+       they were already the first instruction in the TB.  If this is not
+       the first instruction in a TB then re-execute the preceeding
+       branch.  */
+#if defined(TARGET_MIPS)
+    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
+        env->active_tc.PC -= 4;
+        env->icount_decr.u16.low++;
+        env->hflags &= ~MIPS_HFLAG_BMASK;
+    }
+#elif defined(TARGET_SH4)
+    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
+            && n > 1) {
+        env->pc -= 2;
+        env->icount_decr.u16.low++;
+        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
+    }
+#endif
+    /* This should never happen.  */
+    if (n > CF_COUNT_MASK)
+        cpu_abort(env, "TB too big during recompile");
+
+    cflags = n | CF_LAST_IO;
+    pc = tb->pc;
+    cs_base = tb->cs_base;
+    flags = tb->flags;
+    tb_phys_invalidate(tb, -1);
+    /* FIXME: In theory this could raise an exception.  In practice
+       we have already translated the block once so it's probably ok.  */
+    tb_gen_code(env, pc, cs_base, flags, cflags);
+    /* TODO: If env->pc != tb->pc (i.e. the failuting instruction was not
+       the first in the TB) then we end up generating a whole new TB and
+       repeating the fault, which is horribly inefficient.
+       Better would be to execute just this insn uncached, or generate a
+       second new TB.  */
+    cpu_resume_from_signal(env, NULL);
+}
+
 void dump_exec_info(FILE *f,
                     int (*cpu_fprintf)(FILE *f, const char *fmt, ...))
 {

Modified: trunk/hw/mips_timer.c
===================================================================
--- trunk/hw/mips_timer.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/hw/mips_timer.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -91,7 +91,12 @@
     if (env->CP0_Cause & (1 << CP0Ca_DC))
         return;
 
+    /* ??? This callback should occur when the counter is exactly equal to
+       the comparator value.  Offset the count by one to avoid immediately
+       retriggering the callback before any virtual time has passed.  */
+    env->CP0_Count++;
     cpu_mips_timer_update(env);
+    env->CP0_Count--;
     if (env->insn_flags & ISA_MIPS32R2)
         env->CP0_Cause |= 1 << CP0Ca_TI;
     qemu_irq_raise(env->irq[(env->CP0_IntCtl >> CP0IntCtl_IPTI) & 0x7]);

Modified: trunk/qemu-doc.texi
===================================================================
--- trunk/qemu-doc.texi	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/qemu-doc.texi	2008-06-29 01:03:05 UTC (rev 4799)
@@ -965,6 +965,17 @@
 
 Note that this allows guest direct access to the host filesystem,
 so should only be used with trusted guest OS.
+
+@item -icount [N|auto]
+Enable virtual instruction counter.  The virtual cpu will execute one
+instruction every 2^N ns of virtual time.  If @code{auto} is specified
+then the virtual cpu speed will be automatically adjusted to keep virtual
+time within a few seconds of real time.
+
+Note that while this option can give deterministic behavior, it does not
+provide cycle accurate emulation.  Modern CPUs contain superscalar out of
+order cores with complex cache heirachies.  The number of instructions
+executed often has little or no correlation with actual performance.
 @end table
 
 @c man end

Modified: trunk/softmmu_template.h
===================================================================
--- trunk/softmmu_template.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/softmmu_template.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -51,12 +51,18 @@
                                                         int mmu_idx,
                                                         void *retaddr);
 static inline DATA_TYPE glue(io_read, SUFFIX)(target_phys_addr_t physaddr,
-                                              target_ulong addr)
+                                              target_ulong addr,
+                                              void *retaddr)
 {
     DATA_TYPE res;
     int index;
     index = (physaddr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+    env->mem_io_pc = (unsigned long)retaddr;
+    if (index > (IO_MEM_NOTDIRTY >> IO_MEM_SHIFT)
+            && !can_do_io(env)) {
+        cpu_io_recompile(env, retaddr);
+    }
 
 #if SHIFT <= 2
     res = io_mem_read[index][SHIFT](io_mem_opaque[index], physaddr);
@@ -95,8 +101,9 @@
             /* IO access */
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
+            retaddr = GETPC();
             addend = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(addend, addr);
+            res = glue(io_read, SUFFIX)(addend, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
@@ -148,8 +155,9 @@
             /* IO access */
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
+            retaddr = GETPC();
             addend = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(addend, addr);
+            res = glue(io_read, SUFFIX)(addend, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
@@ -194,9 +202,13 @@
     int index;
     index = (physaddr >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
+    if (index > (IO_MEM_NOTDIRTY >> IO_MEM_SHIFT)
+            && !can_do_io(env)) {
+        cpu_io_recompile(env, retaddr);
+    }
 
-    env->mem_write_vaddr = addr;
-    env->mem_write_pc = (unsigned long)retaddr;
+    env->mem_io_vaddr = addr;
+    env->mem_io_pc = (unsigned long)retaddr;
 #if SHIFT <= 2
     io_mem_write[index][SHIFT](io_mem_opaque[index], physaddr, val);
 #else

Modified: trunk/target-alpha/cpu.h
===================================================================
--- trunk/target-alpha/cpu.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-alpha/cpu.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -415,4 +415,6 @@
 void pal_init (CPUState *env);
 void call_pal (CPUState *env, int palcode);
 
+#define CPU_PC_FROM_TB(env, tb) env->pc = tb->pc
+
 #endif /* !defined (__CPU_ALPHA_H__) */

Modified: trunk/target-alpha/translate.c
===================================================================
--- trunk/target-alpha/translate.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-alpha/translate.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -43,6 +43,19 @@
     uint32_t amask;
 };
 
+TCGv cpu_env;
+
+#include "gen-icount.h"
+
+void alpha_translate_init()
+{
+    static int done_init = 0;
+    if (done_init)
+        return;
+    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
+    done_init = 1;
+}
+
 static always_inline void gen_op_nop (void)
 {
 #if defined(GENERATE_NOP)
@@ -1970,6 +1983,8 @@
     uint16_t *gen_opc_end;
     int j, lj = -1;
     int ret;
+    int num_insns;
+    int max_insns;
 
     pc_start = tb->pc;
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
@@ -1981,6 +1996,12 @@
     ctx.mem_idx = ((env->ps >> 3) & 3);
     ctx.pal_mode = env->ipr[IPR_EXC_ADDR] & 1;
 #endif
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
+
+    gen_icount_start();
     for (ret = 0; ret == 0;) {
         if (env->nb_breakpoints > 0) {
             for(j = 0; j < env->nb_breakpoints; j++) {
@@ -1998,8 +2019,11 @@
                     gen_opc_instr_start[lj++] = 0;
                 gen_opc_pc[lj] = ctx.pc;
                 gen_opc_instr_start[lj] = 1;
+                gen_opc_icount[lj] = num_insns;
             }
         }
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
 #if defined ALPHA_DEBUG_DISAS
         insn_count++;
         if (logfile != NULL) {
@@ -2014,6 +2038,7 @@
             fprintf(logfile, "opcode %08x %d\n", insn, insn_count);
         }
 #endif
+        num_insns++;
         ctx.pc += 4;
         ret = translate_one(ctxp, insn);
         if (ret != 0)
@@ -2022,7 +2047,8 @@
          * generation
          */
         if (((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0) ||
-            (env->singlestep_enabled)) {
+            (env->singlestep_enabled) ||
+            num_insns >= max_insns) {
             break;
         }
 #if defined (DO_SINGLE_STEP)
@@ -2035,8 +2061,11 @@
 #if defined (DO_TB_FLUSH)
     gen_op_tb_flush();
 #endif
+    if (tb->cflags & CF_LAST_IO)
+        gen_io_end();
     /* Generate the return instruction */
     tcg_gen_exit_tb(0);
+    gen_icount_end(tb, num_insns);
     *gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         j = gen_opc_ptr - gen_opc_buf;
@@ -2045,6 +2074,7 @@
             gen_opc_instr_start[lj++] = 0;
     } else {
         tb->size = ctx.pc - pc_start;
+        tb->icount = num_insns;
     }
 #if defined ALPHA_DEBUG_DISAS
     if (loglevel & CPU_LOG_TB_CPU) {
@@ -2079,6 +2109,7 @@
     if (!env)
         return NULL;
     cpu_exec_init(env);
+    alpha_translate_init();
     tlb_flush(env, 1);
     /* XXX: should not be hardcoded */
     env->implver = IMPLVER_2106x;

Modified: trunk/target-arm/cpu.h
===================================================================
--- trunk/target-arm/cpu.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-arm/cpu.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -417,6 +417,8 @@
 }
 #endif
 
+#define CPU_PC_FROM_TB(env, tb) env->regs[15] = tb->pc
+
 #include "cpu-all.h"
 
 #endif

Modified: trunk/target-arm/translate.c
===================================================================
--- trunk/target-arm/translate.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-arm/translate.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -84,6 +84,9 @@
 static TCGv cpu_T[2];
 static TCGv cpu_F0s, cpu_F1s, cpu_F0d, cpu_F1d;
 
+#define ICOUNT_TEMP cpu_T[0]
+#include "gen-icount.h"
+
 /* initialize TCG globals.  */
 void arm_translate_init(void)
 {
@@ -8539,6 +8542,8 @@
     int j, lj;
     target_ulong pc_start;
     uint32_t next_page_start;
+    int num_insns;
+    int max_insns;
 
     /* generate intermediate code */
     num_temps = 0;
@@ -8575,6 +8580,12 @@
     cpu_M0 = tcg_temp_new(TCG_TYPE_I64);
     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
     lj = -1;
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
+
+    gen_icount_start();
     /* Reset the conditional execution bits immediately. This avoids
        complications trying to do it at the end of the block.  */
     if (env->condexec_bits)
@@ -8625,8 +8636,12 @@
             }
             gen_opc_pc[lj] = dc->pc;
             gen_opc_instr_start[lj] = 1;
+            gen_opc_icount[lj] = num_insns;
         }
 
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
+
         if (env->thumb) {
             disas_thumb_insn(env, dc);
             if (dc->condexec_mask) {
@@ -8659,10 +8674,21 @@
          * Otherwise the subsequent code could get translated several times.
          * Also stop translation when a page boundary is reached.  This
          * ensures prefech aborts occur at the right place.  */
+        num_insns ++;
     } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end &&
              !env->singlestep_enabled &&
-             dc->pc < next_page_start);
+             dc->pc < next_page_start &&
+             num_insns < max_insns);
 
+    if (tb->cflags & CF_LAST_IO) {
+        if (dc->condjmp) {
+            /* FIXME:  This can theoretically happen with self-modifying
+               code.  */
+            cpu_abort(env, "IO on conditional branch instruction");
+        }
+        gen_io_end();
+    }
+
     /* At this stage dc->condjmp will only be set when the skipped
        instruction was a conditional branch or trap, and the PC has
        already been written.  */
@@ -8726,7 +8752,9 @@
             dc->condjmp = 0;
         }
     }
+
 done_generating:
+    gen_icount_end(tb, num_insns);
     *gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
@@ -8744,6 +8772,7 @@
             gen_opc_instr_start[lj++] = 0;
     } else {
         tb->size = dc->pc - pc_start;
+        tb->icount = num_insns;
     }
     return 0;
 }

Modified: trunk/target-cris/cpu.h
===================================================================
--- trunk/target-cris/cpu.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-cris/cpu.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -238,5 +238,7 @@
 #define SFR_RW_MM_TLB_LO   env->pregs[PR_SRS]][5
 #define SFR_RW_MM_TLB_HI   env->pregs[PR_SRS]][6
 
+#define CPU_PC_FROM_TB(env, tb) env->pc = tb->pc
+
 #include "cpu-all.h"
 #endif

Modified: trunk/target-cris/translate.c
===================================================================
--- trunk/target-cris/translate.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-cris/translate.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -77,6 +77,8 @@
 TCGv env_btarget;
 TCGv env_pc;
 
+#include "gen-icount.h"
+
 /* This is the state at translation time.  */
 typedef struct DisasContext {
 	CPUState *env;
@@ -3032,6 +3034,8 @@
 	struct DisasContext *dc = &ctx;
 	uint32_t next_page_start;
 	target_ulong npc;
+        int num_insns;
+        int max_insns;
 
 	if (!logfile)
 		logfile = stderr;
@@ -3092,6 +3096,12 @@
 
 	next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
 	lj = -1;
+        num_insns = 0;
+        max_insns = tb->cflags & CF_COUNT_MASK;
+        if (max_insns == 0)
+            max_insns = CF_COUNT_MASK;
+
+        gen_icount_start();
 	do
 	{
 		check_breakpoint(env, dc);
@@ -3108,6 +3118,7 @@
 			else
 				gen_opc_pc[lj] = dc->pc;
 			gen_opc_instr_start[lj] = 1;
+                        gen_opc_icount[lj] = num_insns;
 		}
 
 		/* Pretty disas.  */
@@ -3116,6 +3127,8 @@
 			DIS(fprintf(logfile, "%x ", dc->pc));
 		}
 
+                if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+                    gen_io_start();
 		dc->clear_x = 1;
 		if (unlikely(loglevel & CPU_LOG_TB_OP))
 			tcg_gen_debug_insn_start(dc->pc);
@@ -3125,6 +3138,7 @@
 		if (dc->clear_x)
 			cris_clear_x_flag(dc);
 
+                num_insns++;
 		/* Check for delayed branches here. If we do it before
 		   actually genereating any host code, the simulator will just
 		   loop doing nothing for on this program location.  */
@@ -3151,12 +3165,15 @@
 		if (!(tb->pc & 1) && env->singlestep_enabled)
 			break;
 	} while (!dc->is_jmp && gen_opc_ptr < gen_opc_end
-		 && (dc->pc < next_page_start));
+		 && (dc->pc < next_page_start)
+                 && num_insns < max_insns);
 
 	npc = dc->pc;
 	if (dc->jmp == JMP_DIRECT && !dc->delayed_branch)
 		npc = dc->jmp_pc;
 
+        if (tb->cflags & CF_LAST_IO)
+            gen_io_end();
 	/* Force an update if the per-tb cpu state has changed.  */
 	if (dc->is_jmp == DISAS_NEXT
 	    && (dc->cpustate_changed || !dc->flagx_known 
@@ -3194,6 +3211,7 @@
 				break;
 		}
 	}
+        gen_icount_end(tb, num_insns);
 	*gen_opc_ptr = INDEX_op_end;
 	if (search_pc) {
 		j = gen_opc_ptr - gen_opc_buf;
@@ -3202,6 +3220,7 @@
 			gen_opc_instr_start[lj++] = 0;
 	} else {
 		tb->size = dc->pc - pc_start;
+                tb->icount = num_insns;
 	}
 
 #ifdef DEBUG_DISAS

Modified: trunk/target-i386/cpu.h
===================================================================
--- trunk/target-i386/cpu.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-i386/cpu.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -753,6 +753,8 @@
 }
 #endif
 
+#define CPU_PC_FROM_TB(env, tb) env->eip = tb->pc - tb->cs_base
+
 #include "cpu-all.h"
 
 #include "svm.h"

Modified: trunk/target-i386/translate.c
===================================================================
--- trunk/target-i386/translate.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-i386/translate.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -65,6 +65,8 @@
 static TCGv cpu_tmp0, cpu_tmp1_i64, cpu_tmp2_i32, cpu_tmp3_i32, cpu_tmp4, cpu_ptr0, cpu_ptr1;
 static TCGv cpu_tmp5, cpu_tmp6;
 
+#include "gen-icount.h"
+
 #ifdef TARGET_X86_64
 static int x86_64_hregs;
 #endif
@@ -1203,6 +1205,8 @@
 
 static inline void gen_ins(DisasContext *s, int ot)
 {
+    if (use_icount)
+        gen_io_start();
     gen_string_movl_A0_EDI(s);
     /* Note: we must do this dummy write first to be restartable in
        case of page fault. */
@@ -1215,10 +1219,14 @@
     gen_op_st_T0_A0(ot + s->mem_index);
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_EDI);
+    if (use_icount)
+        gen_io_end();
 }
 
 static inline void gen_outs(DisasContext *s, int ot)
 {
+    if (use_icount)
+        gen_io_start();
     gen_string_movl_A0_ESI(s);
     gen_op_ld_T0_A0(ot + s->mem_index);
 
@@ -1230,6 +1238,8 @@
 
     gen_op_movl_T0_Dshift(ot);
     gen_op_add_reg_T0(s->aflag, R_ESI);
+    if (use_icount)
+        gen_io_end();
 }
 
 /* same method as Valgrind : we generate jumps to current or next
@@ -5570,6 +5580,9 @@
             gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
         } else {
             gen_ins(s, ot);
+            if (use_icount) {
+                gen_jmp(s, s->pc - s->cs_base);
+            }
         }
         break;
     case 0x6e: /* outsS */
@@ -5586,6 +5599,9 @@
             gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base);
         } else {
             gen_outs(s, ot);
+            if (use_icount) {
+                gen_jmp(s, s->pc - s->cs_base);
+            }
         }
         break;
 
@@ -5602,9 +5618,15 @@
         gen_op_movl_T0_im(val);
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+        if (use_icount)
+            gen_io_start();
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         tcg_gen_helper_1_1(helper_in_func[ot], cpu_T[1], cpu_tmp2_i32);
         gen_op_mov_reg_T1(ot, R_EAX);
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
         break;
     case 0xe6:
     case 0xe7:
@@ -5618,10 +5640,16 @@
                      svm_is_rep(prefixes));
         gen_op_mov_TN_reg(ot, 1, R_EAX);
 
+        if (use_icount)
+            gen_io_start();
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
         tcg_gen_helper_0_2(helper_out_func[ot], cpu_tmp2_i32, cpu_tmp3_i32);
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
         break;
     case 0xec:
     case 0xed:
@@ -5633,9 +5661,15 @@
         gen_op_andl_T0_ffff();
         gen_check_io(s, ot, pc_start - s->cs_base,
                      SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes));
+        if (use_icount)
+            gen_io_start();
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         tcg_gen_helper_1_1(helper_in_func[ot], cpu_T[1], cpu_tmp2_i32);
         gen_op_mov_reg_T1(ot, R_EAX);
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
         break;
     case 0xee:
     case 0xef:
@@ -5649,10 +5683,16 @@
                      svm_is_rep(prefixes));
         gen_op_mov_TN_reg(ot, 1, R_EAX);
 
+        if (use_icount)
+            gen_io_start();
         tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
         tcg_gen_andi_i32(cpu_tmp2_i32, cpu_tmp2_i32, 0xffff);
         tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
         tcg_gen_helper_0_2(helper_out_func[ot], cpu_tmp2_i32, cpu_tmp3_i32);
+        if (use_icount) {
+            gen_io_end();
+            gen_jmp(s, s->pc - s->cs_base);
+        }
         break;
 
         /************************/
@@ -7109,6 +7149,8 @@
     uint64_t flags;
     target_ulong pc_start;
     target_ulong cs_base;
+    int num_insns;
+    int max_insns;
 
     /* generate intermediate code */
     pc_start = tb->pc;
@@ -7179,7 +7221,12 @@
     dc->is_jmp = DISAS_NEXT;
     pc_ptr = pc_start;
     lj = -1;
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
 
+    gen_icount_start();
     for(;;) {
         if (env->nb_breakpoints > 0) {
             for(j = 0; j < env->nb_breakpoints; j++) {
@@ -7199,8 +7246,13 @@
             gen_opc_pc[lj] = pc_ptr;
             gen_opc_cc_op[lj] = dc->cc_op;
             gen_opc_instr_start[lj] = 1;
+            gen_opc_icount[lj] = num_insns;
         }
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
+
         pc_ptr = disas_insn(dc, pc_ptr);
+        num_insns++;
         /* stop translation if indicated */
         if (dc->is_jmp)
             break;
@@ -7210,20 +7262,23 @@
            the flag and abort the translation to give the irqs a
            change to be happen */
         if (dc->tf || dc->singlestep_enabled ||
-            (flags & HF_INHIBIT_IRQ_MASK) ||
-            (cflags & CF_SINGLE_INSN)) {
+            (flags & HF_INHIBIT_IRQ_MASK)) {
             gen_jmp_im(pc_ptr - dc->cs_base);
             gen_eob(dc);
             break;
         }
         /* if too long translation, stop generation too */
         if (gen_opc_ptr >= gen_opc_end ||
-            (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32)) {
+            (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
+            num_insns >= max_insns) {
             gen_jmp_im(pc_ptr - dc->cs_base);
             gen_eob(dc);
             break;
         }
     }
+    if (tb->cflags & CF_LAST_IO)
+        gen_io_end();
+    gen_icount_end(tb, num_insns);
     *gen_opc_ptr = INDEX_op_end;
     /* we don't forget to fill the last values */
     if (search_pc) {
@@ -7252,8 +7307,10 @@
     }
 #endif
 
-    if (!search_pc)
+    if (!search_pc) {
         tb->size = pc_ptr - pc_start;
+        tb->icount = num_insns;
+    }
     return 0;
 }
 

Modified: trunk/target-m68k/cpu.h
===================================================================
--- trunk/target-m68k/cpu.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-m68k/cpu.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -235,6 +235,8 @@
 }
 #endif
 
+#define CPU_PC_FROM_TB(env, tb) env->pc = tb->pc
+
 #include "cpu-all.h"
 
 #endif

Modified: trunk/target-m68k/translate.c
===================================================================
--- trunk/target-m68k/translate.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-m68k/translate.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -63,6 +63,8 @@
 /* Used to distinguish stores from bad addressing modes.  */
 static TCGv store_dummy;
 
+#include "gen-icount.h"
+
 void m68k_tcg_init(void)
 {
     char *p;
@@ -2919,6 +2921,8 @@
     target_ulong pc_start;
     int pc_offset;
     int last_cc_op;
+    int num_insns;
+    int max_insns;
 
     /* generate intermediate code */
     pc_start = tb->pc;
@@ -2937,6 +2941,12 @@
     dc->is_mem = 0;
     dc->mactmp = NULL_QREG;
     lj = -1;
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
+
+    gen_icount_start();
     do {
         pc_offset = dc->pc - pc_start;
         gen_throws_exception = NULL;
@@ -2960,10 +2970,14 @@
             }
             gen_opc_pc[lj] = dc->pc;
             gen_opc_instr_start[lj] = 1;
+            gen_opc_icount[lj] = num_insns;
         }
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
         last_cc_op = dc->cc_op;
         dc->insn_pc = dc->pc;
 	disas_m68k_insn(env, dc);
+        num_insns++;
 
         /* Terminate the TB on memory ops if watchpoints are present.  */
         /* FIXME: This should be replacd by the deterministic execution
@@ -2972,8 +2986,11 @@
             break;
     } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end &&
              !env->singlestep_enabled &&
-             (pc_offset) < (TARGET_PAGE_SIZE - 32));
+             (pc_offset) < (TARGET_PAGE_SIZE - 32) &&
+             num_insns < max_insns);
 
+    if (tb->cflags & CF_LAST_IO)
+        gen_io_end();
     if (__builtin_expect(env->singlestep_enabled, 0)) {
         /* Make sure the pc is updated, and raise a debug exception.  */
         if (!dc->is_jmp) {
@@ -2999,6 +3016,7 @@
             break;
         }
     }
+    gen_icount_end(tb, num_insns);
     *gen_opc_ptr = INDEX_op_end;
 
 #ifdef DEBUG_DISAS
@@ -3016,6 +3034,7 @@
             gen_opc_instr_start[lj++] = 0;
     } else {
         tb->size = dc->pc - pc_start;
+        tb->icount = num_insns;
     }
 
     //optimize_flags();

Modified: trunk/target-mips/cpu.h
===================================================================
--- trunk/target-mips/cpu.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-mips/cpu.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -572,4 +572,10 @@
 uint32_t cpu_mips_get_clock (void);
 int cpu_mips_signal_handler(int host_signum, void *pinfo, void *puc);
 
+#define CPU_PC_FROM_TB(env, tb) do { \
+    env->active_tc.PC = tb->pc; \
+    env->hflags &= ~MIPS_HFLAG_BMASK; \
+    env->hflags |= tb->flags & MIPS_HFLAG_BMASK; \
+    } while (0)
+
 #endif /* !defined (__MIPS_CPU_H__) */

Modified: trunk/target-mips/translate.c
===================================================================
--- trunk/target-mips/translate.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-mips/translate.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -428,6 +428,8 @@
 /* FPU TNs, global for now. */
 static TCGv fpu32_T[3], fpu64_T[3], fpu32h_T[3];
 
+#include "gen-icount.h"
+
 static inline void tcg_gen_helper_0_i(void *func, TCGv arg)
 {
     TCGv tmp = tcg_const_i32(arg);
@@ -3061,7 +3063,14 @@
     case 9:
         switch (sel) {
         case 0:
+            /* Mark as an IO operation because we read the time.  */
+            if (use_icount)
+                gen_io_start();
             tcg_gen_helper_1_0(do_mfc0_count, t0);
+            if (use_icount) {
+                gen_io_end();
+                ctx->bstate = BS_STOP;
+            }
             rn = "Count";
             break;
         /* 6,7 are implementation dependent */
@@ -3422,6 +3431,9 @@
     if (sel != 0)
         check_insn(env, ctx, ISA_MIPS32);
 
+    if (use_icount)
+        gen_io_start();
+
     switch (reg) {
     case 0:
         switch (sel) {
@@ -4004,6 +4016,11 @@
                 rn, reg, sel);
     }
 #endif
+    /* For simplicitly assume that all writes can cause interrupts.  */
+    if (use_icount) {
+        gen_io_end();
+        ctx->bstate = BS_STOP;
+    }
     return;
 
 die:
@@ -4238,7 +4255,14 @@
     case 9:
         switch (sel) {
         case 0:
+            /* Mark as an IO operation because we read the time.  */
+            if (use_icount)
+                gen_io_start();
             tcg_gen_helper_1_0(do_mfc0_count, t0);
+            if (use_icount) {
+                gen_io_end();
+                ctx->bstate = BS_STOP;
+            }
             rn = "Count";
             break;
         /* 6,7 are implementation dependent */
@@ -4591,6 +4615,9 @@
     if (sel != 0)
         check_insn(env, ctx, ISA_MIPS64);
 
+    if (use_icount)
+        gen_io_start();
+
     switch (reg) {
     case 0:
         switch (sel) {
@@ -5161,6 +5188,11 @@
     }
 #endif
     tcg_temp_free(t0);
+    /* For simplicitly assume that all writes can cause interrupts.  */
+    if (use_icount) {
+        gen_io_end();
+        ctx->bstate = BS_STOP;
+    }
     return;
 
 die:
@@ -7760,6 +7792,7 @@
         ctx->hflags &= ~MIPS_HFLAG_BMASK;
         ctx->bstate = BS_BRANCH;
         save_cpu_state(ctx, 0);
+        /* FIXME: Need to clear can_do_io.  */
         switch (hflags) {
         case MIPS_HFLAG_B:
             /* unconditional branch */
@@ -7807,6 +7840,8 @@
     target_ulong pc_start;
     uint16_t *gen_opc_end;
     int j, lj = -1;
+    int num_insns;
+    int max_insns;
 
     if (search_pc && loglevel)
         fprintf (logfile, "search pc %d\n", search_pc);
@@ -7826,6 +7861,11 @@
 #else
     ctx.mem_idx = ctx.hflags & MIPS_HFLAG_KSU;
 #endif
+    num_insns = 0;
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
 #ifdef DEBUG_DISAS
     if (loglevel & CPU_LOG_TB_CPU) {
         fprintf(logfile, "------------------------------------------------\n");
@@ -7838,6 +7878,7 @@
         fprintf(logfile, "\ntb %p idx %d hflags %04x\n",
                 tb, ctx.mem_idx, ctx.hflags);
 #endif
+    gen_icount_start();
     while (ctx.bstate == BS_NONE) {
         if (env->nb_breakpoints > 0) {
             for(j = 0; j < env->nb_breakpoints; j++) {
@@ -7863,10 +7904,14 @@
             gen_opc_pc[lj] = ctx.pc;
             gen_opc_hflags[lj] = ctx.hflags & MIPS_HFLAG_BMASK;
             gen_opc_instr_start[lj] = 1;
+            gen_opc_icount[lj] = num_insns;
         }
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
         ctx.opcode = ldl_code(ctx.pc);
         decode_opc(env, &ctx);
         ctx.pc += 4;
+        num_insns++;
 
         if (env->singlestep_enabled)
             break;
@@ -7880,10 +7925,14 @@
         if (gen_opc_ptr >= gen_opc_end)
             break;
 
+        if (num_insns >= max_insns)
+            break;
 #if defined (MIPS_SINGLE_STEP)
         break;
 #endif
     }
+    if (tb->cflags & CF_LAST_IO)
+        gen_io_end();
     if (env->singlestep_enabled) {
         save_cpu_state(&ctx, ctx.bstate == BS_NONE);
         tcg_gen_helper_0_i(do_raise_exception, EXCP_DEBUG);
@@ -7907,6 +7956,7 @@
 	}
     }
 done_generating:
+    gen_icount_end(tb, num_insns);
     *gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         j = gen_opc_ptr - gen_opc_buf;
@@ -7915,6 +7965,7 @@
             gen_opc_instr_start[lj++] = 0;
     } else {
         tb->size = ctx.pc - pc_start;
+        tb->icount = num_insns;
     }
 #ifdef DEBUG_DISAS
 #if defined MIPS_DEBUG_DISAS

Modified: trunk/target-ppc/cpu.h
===================================================================
--- trunk/target-ppc/cpu.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-ppc/cpu.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -697,6 +697,7 @@
 
 /*****************************************************************************/
 CPUPPCState *cpu_ppc_init (const char *cpu_model);
+void ppc_translate_init(void);
 int cpu_ppc_exec (CPUPPCState *s);
 void cpu_ppc_close (CPUPPCState *s);
 /* you can call this signal handler from your SIGBUS and SIGSEGV
@@ -833,6 +834,8 @@
 }
 #endif
 
+#define CPU_PC_FROM_TB(env, tb) env->nip = tb->pc
+
 #include "cpu-all.h"
 
 /*****************************************************************************/

Modified: trunk/target-ppc/helper.c
===================================================================
--- trunk/target-ppc/helper.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-ppc/helper.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -2977,6 +2977,7 @@
     if (!env)
         return NULL;
     cpu_exec_init(env);
+    ppc_translate_init();
     env->cpu_model_str = cpu_model;
     cpu_ppc_register_internal(env, def);
     cpu_ppc_reset(env);

Modified: trunk/target-ppc/translate.c
===================================================================
--- trunk/target-ppc/translate.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-ppc/translate.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -43,6 +43,19 @@
 /*****************************************************************************/
 /* Code translation helpers                                                  */
 
+static TCGv cpu_env;
+
+#include "gen-icount.h"
+
+void ppc_translate_init(void)
+{
+    int done_init = 0;
+    if (done_init)
+        return;
+    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
+    done_init = 1;
+}
+
 #if defined(OPTIMIZE_FPRF_UPDATE)
 static uint16_t *gen_fprf_buf[OPC_BUF_SIZE];
 static uint16_t **gen_fprf_ptr;
@@ -6168,6 +6181,8 @@
     uint16_t *gen_opc_end;
     int supervisor, little_endian;
     int j, lj = -1;
+    int num_insns;
+    int max_insns;
 
     pc_start = tb->pc;
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
@@ -6211,6 +6226,12 @@
     /* Single step trace mode */
     msr_se = 1;
 #endif
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
+
+    gen_icount_start();
     /* Set env in case of segfault during code fetch */
     while (ctx.exception == POWERPC_EXCP_NONE && gen_opc_ptr < gen_opc_end) {
         if (unlikely(env->nb_breakpoints > 0)) {
@@ -6230,6 +6251,7 @@
                     gen_opc_instr_start[lj++] = 0;
                 gen_opc_pc[lj] = ctx.nip;
                 gen_opc_instr_start[lj] = 1;
+                gen_opc_icount[lj] = num_insns;
             }
         }
 #if defined PPC_DEBUG_DISAS
@@ -6239,6 +6261,8 @@
                     ctx.nip, supervisor, (int)msr_ir);
         }
 #endif
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
         if (unlikely(little_endian)) {
             ctx.opcode = bswap32(ldl_code(ctx.nip));
         } else {
@@ -6253,6 +6277,7 @@
 #endif
         ctx.nip += 4;
         table = env->opcodes;
+        num_insns++;
         handler = table[opc1(ctx.opcode)];
         if (is_indirect_opcode(handler)) {
             table = ind_table(handler);
@@ -6306,7 +6331,8 @@
                      ctx.exception != POWERPC_EXCP_BRANCH)) {
             GEN_EXCP(ctxp, POWERPC_EXCP_TRACE, 0);
         } else if (unlikely(((ctx.nip & (TARGET_PAGE_SIZE - 1)) == 0) ||
-                            (env->singlestep_enabled))) {
+                            (env->singlestep_enabled) ||
+                            num_insns >= max_insns)) {
             /* if we reach a page boundary or are single stepping, stop
              * generation
              */
@@ -6316,6 +6342,8 @@
         break;
 #endif
     }
+    if (tb->cflags & CF_LAST_IO)
+        gen_io_end();
     if (ctx.exception == POWERPC_EXCP_NONE) {
         gen_goto_tb(&ctx, 0, ctx.nip);
     } else if (ctx.exception != POWERPC_EXCP_BRANCH) {
@@ -6326,6 +6354,7 @@
         /* Generate the return instruction */
         tcg_gen_exit_tb(0);
     }
+    gen_icount_end(tb, num_insns);
     *gen_opc_ptr = INDEX_op_end;
     if (unlikely(search_pc)) {
         j = gen_opc_ptr - gen_opc_buf;
@@ -6334,6 +6363,7 @@
             gen_opc_instr_start[lj++] = 0;
     } else {
         tb->size = ctx.nip - pc_start;
+        tb->icount = num_insns;
     }
 #if defined(DEBUG_DISAS)
     if (loglevel & CPU_LOG_TB_CPU) {

Modified: trunk/target-sh4/cpu.h
===================================================================
--- trunk/target-sh4/cpu.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-sh4/cpu.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -152,6 +152,11 @@
 }
 #endif
 
+#define CPU_PC_FROM_TB(env, tb) do { \
+    env->pc = tb->pc; \
+    env->flags = tb->flags; \
+    } while (0)
+
 #include "cpu-all.h"
 
 /* Memory access type */

Modified: trunk/target-sh4/translate.c
===================================================================
--- trunk/target-sh4/translate.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-sh4/translate.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -56,6 +56,19 @@
     BS_EXCP     = 3, /* We reached an exception condition */
 };
 
+static TCGv cpu_env;
+
+#include "gen-icount.h"
+
+void sh4_translate_init()
+{
+    static int done_init = 0;
+    if (done_init)
+        return;
+    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
+    done_init = 1;
+}
+
 #ifdef CONFIG_USER_ONLY
 
 #define GEN_OP_LD(width, reg) \
@@ -143,6 +156,7 @@
     if (!env)
 	return NULL;
     cpu_exec_init(env);
+    sh4_translate_init();
     cpu_sh4_reset(env);
     tlb_flush(env, 1);
     return env;
@@ -1189,6 +1203,8 @@
     target_ulong pc_start;
     static uint16_t *gen_opc_end;
     int i, ii;
+    int num_insns;
+    int max_insns;
 
     pc_start = tb->pc;
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
@@ -1213,6 +1229,11 @@
 #endif
 
     ii = -1;
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
+    gen_icount_start();
     while (ctx.bstate == BS_NONE && gen_opc_ptr < gen_opc_end) {
 	if (env->nb_breakpoints > 0) {
 	    for (i = 0; i < env->nb_breakpoints; i++) {
@@ -1235,22 +1256,30 @@
             gen_opc_pc[ii] = ctx.pc;
             gen_opc_hflags[ii] = ctx.flags;
             gen_opc_instr_start[ii] = 1;
+            gen_opc_icount[ii] = num_insns;
         }
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
 #if 0
 	fprintf(stderr, "Loading opcode at address 0x%08x\n", ctx.pc);
 	fflush(stderr);
 #endif
 	ctx.opcode = lduw_code(ctx.pc);
 	decode_opc(&ctx);
+        num_insns++;
 	ctx.pc += 2;
 	if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0)
 	    break;
 	if (env->singlestep_enabled)
 	    break;
+        if (num_insns >= max_insns)
+            break;
 #ifdef SH4_SINGLE_STEP
 	break;
 #endif
     }
+    if (tb->cflags & CF_LAST_IO)
+        gen_io_end();
     if (env->singlestep_enabled) {
         gen_op_debug();
     } else {
@@ -1274,6 +1303,7 @@
 	}
     }
 
+    gen_icount_end(tb, num_insns);
     *gen_opc_ptr = INDEX_op_end;
     if (search_pc) {
         i = gen_opc_ptr - gen_opc_buf;
@@ -1282,6 +1312,7 @@
             gen_opc_instr_start[ii++] = 0;
     } else {
         tb->size = ctx.pc - pc_start;
+        tb->icount = num_insns;
     }
 
 #ifdef DEBUG_DISAS

Modified: trunk/target-sparc/cpu.h
===================================================================
--- trunk/target-sparc/cpu.h	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-sparc/cpu.h	2008-06-29 01:03:05 UTC (rev 4799)
@@ -437,6 +437,11 @@
 }
 #endif
 
+#define CPU_PC_FROM_TB(env, tb) do { \
+    env->pc = tb->pc; \
+    env->npc = tb->cs_base; \
+    } while(0)
+
 #include "cpu-all.h"
 
 #endif

Modified: trunk/target-sparc/translate.c
===================================================================
--- trunk/target-sparc/translate.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/target-sparc/translate.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -48,6 +48,8 @@
 /* local register indexes (only used inside old micro ops) */
 static TCGv cpu_tmp0, cpu_tmp32, cpu_tmp64;
 
+#include "gen-icount.h"
+
 typedef struct DisasContext {
     target_ulong pc;    /* current Program Counter: integer or DYNAMIC_PC */
     target_ulong npc;   /* next PC: integer or DYNAMIC_PC or JUMP_PC */
@@ -4719,6 +4721,8 @@
     uint16_t *gen_opc_end;
     DisasContext dc1, *dc = &dc1;
     int j, lj = -1;
+    int num_insns;
+    int max_insns;
 
     memset(dc, 0, sizeof(DisasContext));
     dc->tb = tb;
@@ -4747,6 +4751,11 @@
     cpu_val = tcg_temp_local_new(TCG_TYPE_TL);
     cpu_addr = tcg_temp_local_new(TCG_TYPE_TL);
 
+    num_insns = 0;
+    max_insns = tb->cflags & CF_COUNT_MASK;
+    if (max_insns == 0)
+        max_insns = CF_COUNT_MASK;
+    gen_icount_start();
     do {
         if (env->nb_breakpoints > 0) {
             for(j = 0; j < env->nb_breakpoints; j++) {
@@ -4771,10 +4780,14 @@
                 gen_opc_pc[lj] = dc->pc;
                 gen_opc_npc[lj] = dc->npc;
                 gen_opc_instr_start[lj] = 1;
+                gen_opc_icount[lj] = num_insns;
             }
         }
+        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
+            gen_io_start();
         last_pc = dc->pc;
         disas_sparc_insn(dc);
+        num_insns++;
 
         if (dc->is_br)
             break;
@@ -4793,7 +4806,8 @@
             break;
         }
     } while ((gen_opc_ptr < gen_opc_end) &&
-             (dc->pc - pc_start) < (TARGET_PAGE_SIZE - 32));
+             (dc->pc - pc_start) < (TARGET_PAGE_SIZE - 32) &&
+             num_insns < max_insns);
 
  exit_gen_loop:
     tcg_temp_free(cpu_addr);
@@ -4802,6 +4816,8 @@
     tcg_temp_free(cpu_tmp64);
     tcg_temp_free(cpu_tmp32);
     tcg_temp_free(cpu_tmp0);
+    if (tb->cflags & CF_LAST_IO)
+        gen_io_end();
     if (!dc->is_br) {
         if (dc->pc != DYNAMIC_PC &&
             (dc->npc != DYNAMIC_PC && dc->npc != JUMP_PC)) {
@@ -4814,6 +4830,7 @@
             tcg_gen_exit_tb(0);
         }
     }
+    gen_icount_end(tb, num_insns);
     *gen_opc_ptr = INDEX_op_end;
     if (spc) {
         j = gen_opc_ptr - gen_opc_buf;
@@ -4829,6 +4846,7 @@
         gen_opc_jump_pc[1] = dc->jump_pc[1];
     } else {
         tb->size = last_pc + 4 - pc_start;
+        tb->icount = num_insns;
     }
 #ifdef DEBUG_DISAS
     if (loglevel & CPU_LOG_TB_IN_ASM) {

Modified: trunk/translate-all.c
===================================================================
--- trunk/translate-all.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/translate-all.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -38,6 +38,7 @@
 TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
 
 target_ulong gen_opc_pc[OPC_BUF_SIZE];
+uint16_t gen_opc_icount[OPC_BUF_SIZE];
 uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
 #if defined(TARGET_I386)
 uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
@@ -158,6 +159,13 @@
     if (gen_intermediate_code_pc(env, tb) < 0)
         return -1;
 
+    if (use_icount) {
+        /* Reset the cycle counter to the start of the block.  */
+        env->icount_decr.u16.low += tb->icount;
+        /* Clear the IO flag.  */
+        env->can_do_io = 0;
+    }
+
     /* find opc index corresponding to search_pc */
     tc_ptr = (unsigned long)tb->tc_ptr;
     if (searched_pc < tc_ptr)
@@ -177,6 +185,7 @@
     /* now find start of instruction before */
     while (gen_opc_instr_start[j] == 0)
         j--;
+    env->icount_decr.u16.low -= gen_opc_icount[j];
 
     gen_pc_load(env, tb, searched_pc, j, puc);
 

Modified: trunk/vl.c
===================================================================
--- trunk/vl.c	2008-06-29 01:00:34 UTC (rev 4798)
+++ trunk/vl.c	2008-06-29 01:03:05 UTC (rev 4799)
@@ -239,6 +239,14 @@
 static CPUState *cur_cpu;
 static CPUState *next_cpu;
 static int event_pending = 1;
+/* Conversion factor from emulated instrctions to virtual clock ticks.  */
+static int icount_time_shift;
+/* Arbitrarily pick 1MIPS as the minimum alowable speed.  */
+#define MAX_ICOUNT_SHIFT 10
+/* Compensate for varying guest execution speed.  */
+static int64_t qemu_icount_bias;
+QEMUTimer *icount_rt_timer;
+QEMUTimer *icount_vm_timer;
 
 #define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
 
@@ -733,9 +741,22 @@
         return tv.tv_sec * 1000000000LL + (tv.tv_usec * 1000);
     }
 }
-
 #endif
 
+/* Return the virtual CPU time, based on the instruction counter.  */
+static int64_t cpu_get_icount(void)
+{
+    int64_t icount;
+    CPUState *env = cpu_single_env;;
+    icount = qemu_icount;
+    if (env) {
+        if (!can_do_io(env))
+            fprintf(stderr, "Bad clock read\n");
+        icount -= (env->icount_decr.u16.low + env->icount_extra);
+    }
+    return qemu_icount_bias + (icount << icount_time_shift);
+}
+
 /***********************************************************/
 /* guest cycle counter */
 
@@ -747,6 +768,9 @@
 /* return the host CPU cycle counter and handle stop/restart */
 int64_t cpu_get_ticks(void)
 {
+    if (use_icount) {
+        return cpu_get_icount();
+    }
     if (!cpu_ticks_enabled) {
         return cpu_ticks_offset;
     } else {
@@ -878,6 +902,71 @@
 
 #endif /* _WIN32 */
 
+/* Correlation between real and virtual time is always going to be
+   farly approximate, so ignore small variation.
+   When the guest is idle real and virtual time will be aligned in
+   the IO wait loop.  */
+#define ICOUNT_WOBBLE (QEMU_TIMER_BASE / 10)
+
+static void icount_adjust(void)
+{
+    int64_t cur_time;
+    int64_t cur_icount;
+    int64_t delta;
+    static int64_t last_delta;
+    /* If the VM is not running, then do nothing.  */
+    if (!vm_running)
+        return;
+
+    cur_time = cpu_get_clock();
+    cur_icount = qemu_get_clock(vm_clock);
+    delta = cur_icount - cur_time;
+    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
+    if (delta > 0
+        && last_delta + ICOUNT_WOBBLE < delta * 2
+        && icount_time_shift > 0) {
+        /* The guest is getting too far ahead.  Slow time down.  */
+        icount_time_shift--;
+    }
+    if (delta < 0
+        && last_delta - ICOUNT_WOBBLE > delta * 2
+        && icount_time_shift < MAX_ICOUNT_SHIFT) {
+        /* The guest is getting too far behind.  Speed time up.  */
+        icount_time_shift++;
+    }
+    last_delta = delta;
+    qemu_icount_bias = cur_icount - (qemu_icount << icount_time_shift);
+}
+
+static void icount_adjust_rt(void * opaque)
+{
+    qemu_mod_timer(icount_rt_timer,
+                   qemu_get_clock(rt_clock) + 1000);
+    icount_adjust();
+}
+
+static void icount_adjust_vm(void * opaque)
+{
+    qemu_mod_timer(icount_vm_timer,
+                   qemu_get_clock(vm_clock) + QEMU_TIMER_BASE / 10);
+    icount_adjust();
+}
+
+static void init_icount_adjust(void)
+{
+    /* Have both realtime and virtual time triggers for speed adjustment.
+       The realtime trigger catches emulated time passing too slowly,
+       the virtual time trigger catches emulated time passing too fast.
+       Realtime triggers occur even when idle, so use them less frequently
+       than VM triggers.  */
+    icount_rt_timer = qemu_new_timer(rt_clock, icount_adjust_rt, NULL);
+    qemu_mod_timer(icount_rt_timer,
+                   qemu_get_clock(rt_clock) + 1000);
+    icount_vm_timer = qemu_new_timer(vm_clock, icount_adjust_vm, NULL);
+    qemu_mod_timer(icount_vm_timer,
+                   qemu_get_clock(vm_clock) + QEMU_TIMER_BASE / 10);
+}
+
 static struct qemu_alarm_timer alarm_timers[] = {
 #ifndef _WIN32
 #ifdef __linux__
@@ -914,6 +1003,7 @@
     int count = (sizeof(alarm_timers) / sizeof(*alarm_timers)) - 1;
     char *arg;
     char *name;
+    struct qemu_alarm_timer tmp;
 
     if (!strcmp(opt, "?")) {
         show_available_alarms();
@@ -925,8 +1015,6 @@
     /* Reorder the array */
     name = strtok(arg, ",");
     while (name) {
-        struct qemu_alarm_timer tmp;
-
         for (i = 0; i < count && alarm_timers[i].name; i++) {
             if (!strcmp(alarm_timers[i].name, name))
                 break;
@@ -954,7 +1042,7 @@
     free(arg);
 
     if (cur) {
-	/* Disable remaining timers */
+        /* Disable remaining timers */
         for (i = cur; i < count; i++)
             alarm_timers[i].name = NULL;
     } else {
@@ -1039,9 +1127,15 @@
     *pt = ts;
 
     /* Rearm if necessary  */
-    if ((alarm_timer->flags & ALARM_FLAG_EXPIRED) == 0 &&
-        pt == &active_timers[ts->clock->type])
-        qemu_rearm_alarm_timer(alarm_timer);
+    if (pt == &active_timers[ts->clock->type]) {
+        if ((alarm_timer->flags & ALARM_FLAG_EXPIRED) == 0) {
+            qemu_rearm_alarm_timer(alarm_timer);
+        }
+        /* Interrupt execution to force deadline recalculation.  */
+        if (use_icount && cpu_single_env) {
+            cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT);
+        }
+    }
 }
 
 int qemu_timer_pending(QEMUTimer *ts)
@@ -1085,7 +1179,11 @@
         return get_clock() / 1000000;
     default:
     case QEMU_TIMER_VIRTUAL:
-        return cpu_get_clock();
+        if (use_icount) {
+            return cpu_get_icount();
+        } else {
+            return cpu_get_clock();
+        }
     }
 }
 
@@ -1184,8 +1282,9 @@
     }
 #endif
     if (alarm_has_dynticks(alarm_timer) ||
-        qemu_timer_expired(active_timers[QEMU_TIMER_VIRTUAL],
-                           qemu_get_clock(vm_clock)) ||
+        (!use_icount &&
+            qemu_timer_expired(active_timers[QEMU_TIMER_VIRTUAL],
+                               qemu_get_clock(vm_clock))) ||
         qemu_timer_expired(active_timers[QEMU_TIMER_REALTIME],
                            qemu_get_clock(rt_clock))) {
 #ifdef _WIN32
@@ -1209,30 +1308,47 @@
     }
 }
 
-static uint64_t qemu_next_deadline(void)
+static int64_t qemu_next_deadline(void)
 {
-    int64_t nearest_delta_us = INT64_MAX;
-    int64_t vmdelta_us;
+    int64_t delta;
 
-    if (active_timers[QEMU_TIMER_REALTIME])
-        nearest_delta_us = (active_timers[QEMU_TIMER_REALTIME]->expire_time -
-                            qemu_get_clock(rt_clock))*1000;
-
     if (active_timers[QEMU_TIMER_VIRTUAL]) {
-        /* round up */
-        vmdelta_us = (active_timers[QEMU_TIMER_VIRTUAL]->expire_time -
-                      qemu_get_clock(vm_clock)+999)/1000;
-        if (vmdelta_us < nearest_delta_us)
-            nearest_delta_us = vmdelta_us;
+        delta = active_timers[QEMU_TIMER_VIRTUAL]->expire_time -
+                     qemu_get_clock(vm_clock);
+    } else {
+        /* To avoid problems with overflow limit this to 2^32.  */
+        delta = INT32_MAX;
     }
 
-    /* Avoid arming the timer to negative, zero, or too low values */
-    if (nearest_delta_us <= MIN_TIMER_REARM_US)
-        nearest_delta_us = MIN_TIMER_REARM_US;
+    if (delta < 0)
+        delta = 0;
 
-    return nearest_delta_us;
+    return delta;
 }
 
+static uint64_t qemu_next_deadline_dyntick(void)
+{
+    int64_t delta;
+    int64_t rtdelta;
+
+    if (use_icount)
+        delta = INT32_MAX;
+    else
+        delta = (qemu_next_deadline() + 999) / 1000;
+
+    if (active_timers[QEMU_TIMER_REALTIME]) {
+        rtdelta = (active_timers[QEMU_TIMER_REALTIME]->expire_time -
+                 qemu_get_clock(rt_clock))*1000;
+        if (rtdelta < delta)
+            delta = rtdelta;
+    }
+
+    if (delta < MIN_TIMER_REARM_US)
+        delta = MIN_TIMER_REARM_US;
+
+    return delta;
+}
+
 #ifndef _WIN32
 
 #if defined(__linux__)
@@ -1386,7 +1502,7 @@
                 !active_timers[QEMU_TIMER_VIRTUAL])
         return;
 
-    nearest_delta_us = qemu_next_deadline();
+    nearest_delta_us = qemu_next_deadline_dyntick();
 
     /* check whether a timer is already running */
     if (timer_gettime(host_timer, &timeout)) {
@@ -1513,7 +1629,7 @@
                 !active_timers[QEMU_TIMER_VIRTUAL])
         return;
 
-    nearest_delta_us = qemu_next_deadline();
+    nearest_delta_us = qemu_next_deadline_dyntick();
     nearest_delta_us /= 1000;
 
     timeKillEvent(data->timerId);
@@ -7068,10 +7184,33 @@
 #ifdef CONFIG_PROFILER
                 ti = profile_getclock();
 #endif
+                if (use_icount) {
+                    int64_t count;
+                    int decr;
+                    qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
+                    env->icount_decr.u16.low = 0;
+                    env->icount_extra = 0;
+                    count = qemu_next_deadline();
+                    count = (count + (1 << icount_time_shift) - 1)
+                            >> icount_time_shift;
+                    qemu_icount += count;
+                    decr = (count > 0xffff) ? 0xffff : count;
+                    count -= decr;
+                    env->icount_decr.u16.low = decr;
+                    env->icount_extra = count;
+                }
                 ret = cpu_exec(env);
 #ifdef CONFIG_PROFILER
                 qemu_time += profile_getclock() - ti;
 #endif
+                if (use_icount) {
+                    /* Fold pending instructions back into the
+                       instruction counter, and clear the interrupt flag.  */
+                    qemu_icount -= (env->icount_decr.u16.low
+                                    + env->icount_extra);
+                    env->icount_decr.u32 = 0;
+                    env->icount_extra = 0;
+                }
                 next_cpu = env->next_cpu ?: first_cpu;
                 if (event_pending && likely(ret != EXCP_DEBUG)) {
                     ret = EXCP_INTERRUPT;
@@ -7115,10 +7254,46 @@
             }
             /* If all cpus are halted then wait until the next IRQ */
             /* XXX: use timeout computed from timers */
-            if (ret == EXCP_HALTED)
-                timeout = 10;
-            else
+            if (ret == EXCP_HALTED) {
+                if (use_icount) {
+                    int64_t add;
+                    int64_t delta;
+                    /* Advance virtual time to the next event.  */
+                    if (use_icount == 1) {
+                        /* When not using an adaptive execution frequency
+                           we tend to get badly out of sync with real time,
+                           so just delay for a resonable amount of time.  */
+                        delta = 0;
+                    } else {
+                        delta = cpu_get_icount() - cpu_get_clock();
+                    }
+                    if (delta > 0) {
+                        /* If virtual time is ahead of real time then just
+                           wait for IO.  */
+                        timeout = (delta / 1000000) + 1;
+                    } else {
+                        /* Wait for either IO to occur or the next
+                           timer event.  */
+                        add = qemu_next_deadline();
+                        /* We advance the timer before checking for IO.
+                           Limit the amount we advance so that early IO
+                           activity won't get the guest too far ahead.  */
+                        if (add > 10000000)
+                            add = 10000000;
+                        delta += add;
+                        add = (add + (1 << icount_time_shift) - 1)
+                              >> icount_time_shift;
+                        qemu_icount += add;
+                        timeout = delta / 1000000;
+                        if (timeout < 0)
+                            timeout = 0;
+                    }
+                } else {
+                    timeout = 10;
+                }
+            } else {
                 timeout = 0;
+            }
         } else {
             timeout = 10;
         }
@@ -7270,6 +7445,8 @@
            "-clock          force the use of the given methods for timer alarm.\n"
            "                To see what timers are available use -clock ?\n"
            "-startdate      select initial date of the clock\n"
+           "-icount [N|auto]\n"
+           "                Enable virtual instruction counter with 2^N clock ticks per instructon\n"
            "\n"
            "During emulation, the following keys are useful:\n"
            "ctrl-alt-f      toggle full screen\n"
@@ -7374,6 +7551,7 @@
     QEMU_OPTION_clock,
     QEMU_OPTION_startdate,
     QEMU_OPTION_tb_size,
+    QEMU_OPTION_icount,
 };
 
 typedef struct QEMUOption {
@@ -7486,6 +7664,7 @@
     { "clock", HAS_ARG, QEMU_OPTION_clock },
     { "startdate", HAS_ARG, QEMU_OPTION_startdate },
     { "tb-size", HAS_ARG, QEMU_OPTION_tb_size },
+    { "icount", HAS_ARG, QEMU_OPTION_icount },
     { NULL },
 };
 
@@ -8310,6 +8489,14 @@
                 if (tb_size < 0)
                     tb_size = 0;
                 break;
+            case QEMU_OPTION_icount:
+                use_icount = 1;
+                if (strcmp(optarg, "auto") == 0) {
+                    icount_time_shift = -1;
+                } else {
+                    icount_time_shift = strtol(optarg, NULL, 0);
+                }
+                break;
             }
         }
     }
@@ -8395,6 +8582,13 @@
     init_timers();
     init_timer_alarm();
     qemu_aio_init();
+    if (use_icount && icount_time_shift < 0) {
+        use_icount = 2;
+        /* 125MIPS seems a reasonable initial guess at the guest speed.
+           It will be corrected fairly quickly anyway.  */
+        icount_time_shift = 3;
+        init_icount_adjust();
+    }
 
 #ifdef _WIN32
     socket_init();

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [4799] Add instruction counter.
       [not found] ` <6D074CEF-5086-4301-A19C-F1E76E6B313D@hotmail.com>
@ 2008-06-29  4:44   ` C.W. Betts
  0 siblings, 0 replies; 14+ messages in thread
From: C.W. Betts @ 2008-06-29  4:44 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1237 bytes --]


On Jun 28, 2008, at 7:03 PM, Paul Brook wrote:

> Revision: 4799
>         http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4799
> Author:   pbrook
> Date:     2008-06-29 01:03:05 +0000 (Sun, 29 Jun 2008)
>
> Log Message:
> -----------
> Add instruction counter.
>
> Modified Paths:
> --------------
>   trunk/cpu-all.h
>   trunk/cpu-defs.h
>   trunk/cpu-exec.c
>   trunk/exec-all.h
>   trunk/exec.c
>   trunk/hw/mips_timer.c
>   trunk/qemu-doc.texi
>   trunk/softmmu_template.h
>   trunk/target-alpha/cpu.h
>   trunk/target-alpha/translate.c
>   trunk/target-arm/cpu.h
>   trunk/target-arm/translate.c
>   trunk/target-cris/cpu.h
>   trunk/target-cris/translate.c
>   trunk/target-i386/cpu.h
>   trunk/target-i386/translate.c
>   trunk/target-m68k/cpu.h
>   trunk/target-m68k/translate.c
>   trunk/target-mips/cpu.h
>   trunk/target-mips/translate.c
>   trunk/target-ppc/cpu.h
>   trunk/target-ppc/helper.c
>   trunk/target-ppc/translate.c
>   trunk/target-sh4/cpu.h
>   trunk/target-sh4/translate.c
>   trunk/target-sparc/cpu.h
>   trunk/target-sparc/translate.c
>   trunk/translate-all.c
>   trunk/vl.c
> [snip]
Since there isn't a gen-icount.h file, this revision fails to build.   
Maybe a forgotten patch wasn't applied?

[-- Attachment #2: Type: text/html, Size: 3418 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [4799] Add instruction counter.
  2008-06-29  1:03 [Qemu-devel] [4799] Add instruction counter Paul Brook
       [not found] ` <6D074CEF-5086-4301-A19C-F1E76E6B313D@hotmail.com>
@ 2008-06-29  9:58 ` Laurent Desnogues
  2008-06-29 11:57   ` J. Mayer
  2008-06-29 18:44   ` Stuart Brady
  2008-06-29 12:37 ` [Qemu-devel] " Jan Kiszka
  2008-07-10 23:04 ` [Qemu-devel] " Robert Reif
  3 siblings, 2 replies; 14+ messages in thread
From: Laurent Desnogues @ 2008-06-29  9:58 UTC (permalink / raw)
  To: qemu-devel

On Sun, Jun 29, 2008 at 3:03 AM, Paul Brook <paul@nowt.org> wrote:
> Revision: 4799
>          http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4799
> Author:   pbrook
> Date:     2008-06-29 01:03:05 +0000 (Sun, 29 Jun 2008)

Some trivial comments:
  - missing gen_icount.h file
  - duplicate num_insns = 0; in
target_mips/translate.c/gen_intermediate_code_internal
  - typo in vl.c/help "clock ticks per instructon"
  - typo in qemu-doc.texi under icount: "cache heirachies"


Laurent

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [4799] Add instruction counter.
  2008-06-29  9:58 ` Laurent Desnogues
@ 2008-06-29 11:57   ` J. Mayer
  2008-06-29 12:28     ` Paul Brook
  2008-06-29 18:44   ` Stuart Brady
  1 sibling, 1 reply; 14+ messages in thread
From: J. Mayer @ 2008-06-29 11:57 UTC (permalink / raw)
  To: qemu-devel

On Sun, 2008-06-29 at 11:58 +0200, Laurent Desnogues wrote:
> On Sun, Jun 29, 2008 at 3:03 AM, Paul Brook <paul@nowt.org> wrote:
> > Revision: 4799
> >          http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4799
> > Author:   pbrook
> > Date:     2008-06-29 01:03:05 +0000 (Sun, 29 Jun 2008)
> 
> Some trivial comments:
>   - missing gen_icount.h file
>   - duplicate num_insns = 0; in
> target_mips/translate.c/gen_intermediate_code_internal
>   - typo in vl.c/help "clock ticks per instructon"
>   - typo in qemu-doc.texi under icount: "cache heirachies"

A few more comments, taking a quick look at the PowerPC target changes:
+void ppc_translate_init(void)
+{
+    int done_init = 0;
+    if (done_init)
+        return;
+    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
+    done_init = 1;
+}

- done_init is on the stack, thus will never be 1.
- why isn't this variable / code shared with other targets (can see the
exact same code in Alpha target...) ?
- one detail: this function is more likely to be found in
translate_init.c

-- 
J. Mayer <l_indien@magic.fr>
Never organized

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [4799] Add instruction counter.
  2008-06-29 11:57   ` J. Mayer
@ 2008-06-29 12:28     ` Paul Brook
  2008-06-29 13:12       ` J. Mayer
  0 siblings, 1 reply; 14+ messages in thread
From: Paul Brook @ 2008-06-29 12:28 UTC (permalink / raw)
  To: qemu-devel; +Cc: J. Mayer

> +    int done_init = 0;
> +    if (done_init)
> +        return;
> +    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
> +    done_init = 1;
> +}
>
> - done_init is on the stack, thus will never be 1.

Fixed.

> - why isn't this variable / code shared with other targets (can see the
> exact same code in Alpha target...) ?

Maybe. Most targets have many globals and cpu_env is the only one that's 
really target independent. alpha and ppc are only the same because they still 
haven't been converted to TCG.

Paul

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Qemu-devel] Re: [4799] Add instruction counter.
  2008-06-29  1:03 [Qemu-devel] [4799] Add instruction counter Paul Brook
       [not found] ` <6D074CEF-5086-4301-A19C-F1E76E6B313D@hotmail.com>
  2008-06-29  9:58 ` Laurent Desnogues
@ 2008-06-29 12:37 ` Jan Kiszka
  2008-06-29 13:16   ` Paul Brook
  2008-07-10 23:04 ` [Qemu-devel] " Robert Reif
  3 siblings, 1 reply; 14+ messages in thread
From: Jan Kiszka @ 2008-06-29 12:37 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 1887 bytes --]

Paul Brook wrote:
> Revision: 4799
>           http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4799
> Author:   pbrook
> Date:     2008-06-29 01:03:05 +0000 (Sun, 29 Jun 2008)
> 
> Log Message:
> -----------
> Add instruction counter.

...

> +/* in deterministic execution mode, instructions doing device I/Os
> +   must be at the end of the TB */
> +void cpu_io_recompile(CPUState *env, void *retaddr)
> +{
> +    TranslationBlock *tb;
> +    uint32_t n, cflags;
> +    target_ulong pc, cs_base;
> +    uint64_t flags;
> +
> +    tb = tb_find_pc((unsigned long)retaddr);
> +    if (!tb) {
> +        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p", 
> +                  retaddr);
> +    }
> +    n = env->icount_decr.u16.low + tb->icount;
> +    cpu_restore_state(tb, env, (unsigned long)retaddr, NULL);
> +    /* Calculate how many instructions had been executed before the fault
> +       occured.  */
> +    n = n - env->icount_decr.u16.low;
> +    /* Generate a new TB ending on the I/O insn.  */
> +    n++;

On the first glance this function looked like it could serve as an
alternative to SSTEP_INTERNAL and provide the required roll-back on
watchpoint hit. But looking closer I realized that icount_decr is only
maintained if use_icount is set. But that appears to be optional and
default off. Now I'm wondering if I should simply rebase my roll-back
approach or if I should try to generalize yours in order to get the
debugging series work again.

I do not yet get why you were forced to go a different path for
cpu_io_recompile, ie. rebuilding and (re-executing?) the whole TB up to
the instruction that caused the IO access instead of just regenerating a
single-insn TB for that purpose. Is it more efficient? But if use_icount
is off by default, I guess this doesn't come for free either...

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [4799] Add instruction counter.
  2008-06-29 12:28     ` Paul Brook
@ 2008-06-29 13:12       ` J. Mayer
  0 siblings, 0 replies; 14+ messages in thread
From: J. Mayer @ 2008-06-29 13:12 UTC (permalink / raw)
  To: qemu-devel

On Sun, 2008-06-29 at 13:28 +0100, Paul Brook wrote:
> > +    int done_init = 0;
> > +    if (done_init)
> > +        return;
> > +    cpu_env = tcg_global_reg_new(TCG_TYPE_PTR, TCG_AREG0, "env");
> > +    done_init = 1;
> > +}
> >
> > - done_init is on the stack, thus will never be 1.
> 
> Fixed.

thanks

> > - why isn't this variable / code shared with other targets (can see the
> > exact same code in Alpha target...) ?
> 
> Maybe. Most targets have many globals and cpu_env is the only one that's 
> really target independent. alpha and ppc are only the same because they still 
> haven't been converted to TCG.

OK, I see.

-- 
J. Mayer <l_indien@magic.fr>
Never organized

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] Re: [4799] Add instruction counter.
  2008-06-29 12:37 ` [Qemu-devel] " Jan Kiszka
@ 2008-06-29 13:16   ` Paul Brook
  2008-06-29 13:54     ` Jan Kiszka
  0 siblings, 1 reply; 14+ messages in thread
From: Paul Brook @ 2008-06-29 13:16 UTC (permalink / raw)
  To: qemu-devel; +Cc: Jan Kiszka

> On the first glance this function looked like it could serve as an
> alternative to SSTEP_INTERNAL and provide the required roll-back on
> watchpoint hit. But looking closer I realized that icount_decr is only
> maintained if use_icount is set.

I'm fairly sure limiting the length of the TB and actual instruction counting 
are largely independent. IIUC you only need the former.

> I do not yet get why you were forced to go a different path for
> cpu_io_recompile, ie. rebuilding and (re-executing?) the whole TB up to
> the instruction that caused the IO access instead of just regenerating a
> single-insn TB for that purpose. Is it more efficient?

Generating a single insn IO TB is a good idea for resolving the current fault. 
This is what the comment at the end of cpu_io_recompile is referring to.

Regenerating a truncated version of the original version of the TB is 
important for subsequent execution of that block.  MMIO accesses occur 
frequently in loops when the guest is checking status bits or accessing a 
FIFO.  Recompiling the TB means that subsequent accesses complete with 
minimal overhead. If we didn't recompile then every access would incur a 
(very expensive) trap+unwind+singlestep.

The type of access can't be determined statically (it's a property of the 
address being accesses, not the instruction itself). However I'd expect that 
most accesses always access wither RAM or MMIO spaces in practice, so 
recompiling when we see an IO access is a reasonable compromise.

> But if use_icount is off by default, I guess this doesn't come for free
> either... 

See above. cpu_io_recompile is used to get precise delivery of interrupts. 
This is required for but not dependent on having deterministic timing (i.e. 
use_icount).

Paul

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] Re: [4799] Add instruction counter.
  2008-06-29 13:16   ` Paul Brook
@ 2008-06-29 13:54     ` Jan Kiszka
  2008-06-29 14:31       ` Paul Brook
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Kiszka @ 2008-06-29 13:54 UTC (permalink / raw)
  To: Paul Brook; +Cc: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 2358 bytes --]

Paul Brook wrote:
>> On the first glance this function looked like it could serve as an
>> alternative to SSTEP_INTERNAL and provide the required roll-back on
>> watchpoint hit. But looking closer I realized that icount_decr is only
>> maintained if use_icount is set.
> 
> I'm fairly sure limiting the length of the TB and actual instruction counting 
> are largely independent. IIUC you only need the former.

But to calculate the former, you need the latter again. I wonder if it
wouldn't be more efficient and flexible to specify a terminating PC
instead of an instruction count. Wouldn't that make cpu_io_recompile
independent of icount_decr and, thus, use_icount?

> 
>> I do not yet get why you were forced to go a different path for
>> cpu_io_recompile, ie. rebuilding and (re-executing?) the whole TB up to
>> the instruction that caused the IO access instead of just regenerating a
>> single-insn TB for that purpose. Is it more efficient?
> 
> Generating a single insn IO TB is a good idea for resolving the current fault. 
> This is what the comment at the end of cpu_io_recompile is referring to.
> 
> Regenerating a truncated version of the original version of the TB is 
> important for subsequent execution of that block.  MMIO accesses occur 
> frequently in loops when the guest is checking status bits or accessing a 
> FIFO.  Recompiling the TB means that subsequent accesses complete with 
> minimal overhead. If we didn't recompile then every access would incur a 
> (very expensive) trap+unwind+singlestep.
> 
> The type of access can't be determined statically (it's a property of the 
> address being accesses, not the instruction itself). However I'd expect that 
> most accesses always access wither RAM or MMIO spaces in practice, so 
> recompiling when we see an IO access is a reasonable compromise.

OK, understood.

> 
>> But if use_icount is off by default, I guess this doesn't come for free
>> either... 
> 
> See above. cpu_io_recompile is used to get precise delivery of interrupts. 
> This is required for but not dependent on having deterministic timing (i.e. 
> use_icount).

Watchpoints, specifically guest-injected ones, require deterministic
exception delivery as well. So I would like to reuse existing
infrastructure that already solved a similar problem.

Jan


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] Re: [4799] Add instruction counter.
  2008-06-29 13:54     ` Jan Kiszka
@ 2008-06-29 14:31       ` Paul Brook
  0 siblings, 0 replies; 14+ messages in thread
From: Paul Brook @ 2008-06-29 14:31 UTC (permalink / raw)
  To: qemu-devel; +Cc: Jan Kiszka

On Sunday 29 June 2008, Jan Kiszka wrote:
> Paul Brook wrote:
> >> On the first glance this function looked like it could serve as an
> >> alternative to SSTEP_INTERNAL and provide the required roll-back on
> >> watchpoint hit. But looking closer I realized that icount_decr is only
> >> maintained if use_icount is set.
> >
> > I'm fairly sure limiting the length of the TB and actual instruction
> > counting are largely independent. IIUC you only need the former.
>
> But to calculate the former, you need the latter again. 

Not really. You only need to know how far through the TB you got before the 
trap occurred.

> I wonder if it 
> wouldn't be more efficient and flexible to specify a terminating PC
> instead of an instruction count. Wouldn't that make cpu_io_recompile
> independent of icount_decr and, thus, use_icount?

Ah, I see what you're getting at.  cpu_restore_state modifies icount_decr to 
indicate how far through the TB we got.  That's can be independent of 
use_icount.

A terminating PC is much less useful. In general the only instruction you 
really know the location of is the one you're currently at.

Paul

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [4799] Add instruction counter.
  2008-06-29  9:58 ` Laurent Desnogues
  2008-06-29 11:57   ` J. Mayer
@ 2008-06-29 18:44   ` Stuart Brady
  1 sibling, 0 replies; 14+ messages in thread
From: Stuart Brady @ 2008-06-29 18:44 UTC (permalink / raw)
  To: qemu-devel

On Sun, Jun 29, 2008 at 11:58:09AM +0200, Laurent Desnogues wrote:
>   - typo in vl.c/help "clock ticks per instructon"
>   - typo in qemu-doc.texi under icount: "cache heirachies"

A few more:

cpu-exec.c:
  exired -> expired
exec-all.h:
  performaed -> performed
exec.c:
  Precice -> Precise
  temorary -> temporary
  mittle -> middle
  occured -> occurred
  preceeding -> preceding
  failuting -> faulting?
gen-icount.h:
  Helpewrs -> Helpers
  genration -> generation
target-mips/translate.c:
  simplicitly -> simplicity (x2)
vl.c:
  instrctions -> instructions
  alowable -> allowable
  farly -> fairly
  resonable -> reasonable

... and while we're at it:

target-arm/translate.c:
  prefech -> prefetch
target-cris/translate.c:
  genereating -> generating
target-m68k/translate.c:
  replacd -> replaced

Cheers,
-- 
Stuart Brady

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [4799] Add instruction counter.
  2008-06-29  1:03 [Qemu-devel] [4799] Add instruction counter Paul Brook
                   ` (2 preceding siblings ...)
  2008-06-29 12:37 ` [Qemu-devel] " Jan Kiszka
@ 2008-07-10 23:04 ` Robert Reif
  2008-07-11 16:42   ` Blue Swirl
  2008-07-11 16:59   ` Julian Seward
  3 siblings, 2 replies; 14+ messages in thread
From: Robert Reif @ 2008-07-10 23:04 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paul Brook

Paul Brook wrote:
> Revision: 4799
>           http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4799
> Author:   pbrook
> Date:     2008-06-29 01:03:05 +0000 (Sun, 29 Jun 2008)
>
> Log Message:
> -----------
> Add instruction counter.
>
>   
This patch breaks sparc32 for me.

A bisection shows:

4798   works
4799   doesn't build
4800   doesn't work (crashes with illegal instruction)

running qemu-g in sparc-test produces:

./qemu-g: line 4:  9859 Illegal instruction     qemu-system-sparc 
-kernel vmlinux-2.6.11+tcx -initrd linux.img -append "root=/dev/ram"

running under valgrind produces this:

valgrind --trace-children=yes qemu-system-sparc -kernel 
vmlinux-2.6.11+tcx -initrd linux.img -append "root=/dev/ram"
==9861== Memcheck, a memory error detector.
==9861== Copyright (C) 2002-2008, and GNU GPL'd, by Julian Seward et al.
==9861== Using LibVEX rev 1806, a library for dynamic binary translation.
==9861== Copyright (C) 2004-2008, and GNU GPL'd, by OpenWorks LLP.
==9861== Using valgrind-3.4.0.SVN, a dynamic binary instrumentation 
framework.
==9861== Copyright (C) 2000-2008, and GNU GPL'd, by Julian Seward et al.
==9861== For more details, rerun with: -v
==9861==
==9861== Warning: set address range perms: large range 144179200 (undefined)
vex x86->IR: unhandled instruction bytes: 0xFF 0xFF 0xFF 0xFF
vex x86->IR: unhandled instruction bytes: 0xFF 0xFF 0xFF 0xFF
==9861== valgrind: Unrecognised instruction at address 0x114BA050.
==9861== Your program just tried to execute an instruction that Valgrind
==9861== did not recognise.  There are two possible reasons for this.
==9861== 1. Your program has a bug and erroneously jumped to a non-code
==9861==    location.  If you are running Memcheck and you just saw a
==9861==    warning about a bad jump, it's probably your program's fault.
==9861== 2. The instruction is legitimate but Valgrind doesn't handle it,
==9861==    i.e. it's Valgrind's fault.  If you think this is the case or
==9861==    you are not sure, please let us know and we'll try to fix it.
==9861== Either way, Valgrind will now raise a SIGILL signal which will
==9861== probably kill your program.
==9861==
==9861== Process terminating with default action of signal 4 (SIGILL)
==9861==  Illegal opcode at address 0x114BA050
==9861==    at 0x114BA050: ???
==9861==

This is on a RedHat 9 system:

uname -a
Linux dell 2.4.20-31.9smp #1 SMP Tue Apr 13 17:40:10 EDT 2004 i686 i686 
i386 GNU/Linux

gcc --version
gcc (GCC) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [4799] Add instruction counter.
  2008-07-10 23:04 ` [Qemu-devel] " Robert Reif
@ 2008-07-11 16:42   ` Blue Swirl
  2008-07-11 16:59   ` Julian Seward
  1 sibling, 0 replies; 14+ messages in thread
From: Blue Swirl @ 2008-07-11 16:42 UTC (permalink / raw)
  To: qemu-devel

On 7/11/08, Robert Reif <reif@earthlink.net> wrote:
> Paul Brook wrote:
>
> > Revision: 4799
> >
> http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4799
> > Author:   pbrook
> > Date:     2008-06-29 01:03:05 +0000 (Sun, 29 Jun 2008)
> >
> > Log Message:
> > -----------
> > Add instruction counter.
> >
> >
> >
>  This patch breaks sparc32 for me.
>
>  A bisection shows:
>
>  4798   works
>  4799   doesn't build
>  4800   doesn't work (crashes with illegal instruction)
>
>  running qemu-g in sparc-test produces:
>
>  ./qemu-g: line 4:  9859 Illegal instruction     qemu-system-sparc -kernel
> vmlinux-2.6.11+tcx -initrd linux.img -append "root=/dev/ram"
>
>  running under valgrind produces this:
>
>  valgrind --trace-children=yes qemu-system-sparc -kernel vmlinux-2.6.11+tcx
> -initrd linux.img -append "root=/dev/ram"
>  ==9861== Memcheck, a memory error detector.
>  ==9861== Copyright (C) 2002-2008, and GNU GPL'd, by Julian Seward et al.
>  ==9861== Using LibVEX rev 1806, a library for dynamic binary translation.
>  ==9861== Copyright (C) 2004-2008, and GNU GPL'd, by OpenWorks LLP.
>  ==9861== Using valgrind-3.4.0.SVN, a dynamic binary instrumentation
> framework.
>  ==9861== Copyright (C) 2000-2008, and GNU GPL'd, by Julian Seward et al.
>  ==9861== For more details, rerun with: -v
>  ==9861==
>  ==9861== Warning: set address range perms: large range 144179200
> (undefined)
>  vex x86->IR: unhandled instruction bytes: 0xFF 0xFF 0xFF 0xFF
>  vex x86->IR: unhandled instruction bytes: 0xFF 0xFF 0xFF 0xFF
>  ==9861== valgrind: Unrecognised instruction at address 0x114BA050.
>  ==9861== Your program just tried to execute an instruction that Valgrind
>  ==9861== did not recognise.  There are two possible reasons for this.
>  ==9861== 1. Your program has a bug and erroneously jumped to a non-code
>  ==9861==    location.  If you are running Memcheck and you just saw a
>  ==9861==    warning about a bad jump, it's probably your program's fault.
>  ==9861== 2. The instruction is legitimate but Valgrind doesn't handle it,
>  ==9861==    i.e. it's Valgrind's fault.  If you think this is the case or
>  ==9861==    you are not sure, please let us know and we'll try to fix it.
>  ==9861== Either way, Valgrind will now raise a SIGILL signal which will
>  ==9861== probably kill your program.
>  ==9861==
>  ==9861== Process terminating with default action of signal 4 (SIGILL)
>  ==9861==  Illegal opcode at address 0x114BA050
>  ==9861==    at 0x114BA050: ???
>  ==9861==
>
>  This is on a RedHat 9 system:
>
>  uname -a
>  Linux dell 2.4.20-31.9smp #1 SMP Tue Apr 13 17:40:10 EDT 2004 i686 i686
> i386 GNU/Linux
>
>  gcc --version
>  gcc (GCC) 3.2.2 20030222 (Red Hat Linux 3.2.2-5)

I can't reproduce the problem, with valgrind or without. I also tried
recompiling with different gccs but still everything works fine. I
have retired all real i386 machines so my tests were run inside an
i386 chroot on an amd64 system, maybe that causes problems. Does
anyone else see this error?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [4799] Add instruction counter.
  2008-07-10 23:04 ` [Qemu-devel] " Robert Reif
  2008-07-11 16:42   ` Blue Swirl
@ 2008-07-11 16:59   ` Julian Seward
  1 sibling, 0 replies; 14+ messages in thread
From: Julian Seward @ 2008-07-11 16:59 UTC (permalink / raw)
  To: qemu-devel; +Cc: Paul Brook, Robert Reif


On Friday 11 July 2008 01:04, Robert Reif wrote:
> running under valgrind produces this:
>
> valgrind --trace-children=yes qemu-system-sparc -kernel
> vmlinux-2.6.11+tcx -initrd linux.img -append "root=/dev/ram"

Minor side-note.  When running qemu on valgrind, you should give
valgrind the --smc-check=all flag, so that it correctly handles
the situation where qemu generates new code into an area where
previously there was other code (iow, after the TB is flushed).
Otherwise all hell breaks loose (in Valgrind-land).

I have the impression that this became much more important after
Paul Brook's recent changes to support a deterministic time base,
since those cause qemu to much more frequently retranslate blocks
that contain I/O instructions.  (Or something like that ...)

J

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2008-07-11 17:07 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-29  1:03 [Qemu-devel] [4799] Add instruction counter Paul Brook
     [not found] ` <6D074CEF-5086-4301-A19C-F1E76E6B313D@hotmail.com>
2008-06-29  4:44   ` C.W. Betts
2008-06-29  9:58 ` Laurent Desnogues
2008-06-29 11:57   ` J. Mayer
2008-06-29 12:28     ` Paul Brook
2008-06-29 13:12       ` J. Mayer
2008-06-29 18:44   ` Stuart Brady
2008-06-29 12:37 ` [Qemu-devel] " Jan Kiszka
2008-06-29 13:16   ` Paul Brook
2008-06-29 13:54     ` Jan Kiszka
2008-06-29 14:31       ` Paul Brook
2008-07-10 23:04 ` [Qemu-devel] " Robert Reif
2008-07-11 16:42   ` Blue Swirl
2008-07-11 16:59   ` Julian Seward

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).