qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Sami Kiminki <sami.kiminki@tkk.fi>
To: Laurent Desnogues <laurent.desnogues@gmail.com>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [PATCH] Instruction counting instrumentation for ARM, 2nd version & 2nd try
Date: Mon, 15 Jun 2009 16:31:59 +0300	[thread overview]
Message-ID: <1245072719.28612.34.camel@dis> (raw)
In-Reply-To: <761ea48b0906150531j6cf88fa2v5652322efcc7469c@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 275 bytes --]

Apparently I succeeded in forgetting a switch when diffing and 2 files
were missing. Attached is a new attempt.

- Sami

On Mon, 2009-06-15 at 14:31 +0200, Laurent Desnogues wrote:
> Hi Sami,
> 
> instruction.[ch] are not in your patch it seems :)
> 
> Cheers,
> 
> Laurent


[-- Attachment #2: qemu-0.10.4-instrcount.patch --]
[-- Type: text/x-patch, Size: 97798 bytes --]

diff -Nurp qemu-0.10.4/Makefile qemu-0.10.4-instrcount/Makefile
--- qemu-0.10.4/Makefile	2009-05-12 17:56:04.000000000 +0300
+++ qemu-0.10.4-instrcount/Makefile	2009-06-15 16:12:52.453488382 +0300
@@ -179,6 +179,10 @@ libqemu_common.a: $(OBJS)
 # USER_OBJS is code used by qemu userspace emulation
 USER_OBJS=cutils.o  cache-utils.o
 
+# instruction count instrumentation
+USER_OBJS+=instrumentation.o
+
+
 libqemu_user.a: $(USER_OBJS)
 
 ######################################################################
diff -Nurp qemu-0.10.4/Makefile.target qemu-0.10.4-instrcount/Makefile.target
--- qemu-0.10.4/Makefile.target	2009-05-12 17:56:04.000000000 +0300
+++ qemu-0.10.4-instrcount/Makefile.target	2009-06-15 16:12:52.450155174 +0300
@@ -208,8 +208,10 @@ ifeq ($(findstring s390, $(TARGET_ARCH) 
 LIBOBJS+=s390-dis.o
 endif
 
-# libqemu
+# instrumentation support
+LIBOBJS+=instrumentation.o
 
+# libqemu
 libqemu.a: $(LIBOBJS)
 
 translate.o: translate.c cpu.h
diff -Nurp qemu-0.10.4/instrumentation.c qemu-0.10.4-instrcount/instrumentation.c
--- qemu-0.10.4/instrumentation.c	1970-01-01 02:00:00.000000000 +0200
+++ qemu-0.10.4-instrcount/instrumentation.c	2009-06-15 16:12:52.453488382 +0300
@@ -0,0 +1,37 @@
+/*
+ * instrument.c
+ *
+ *  Created on: May 14, 2009
+ *      Author: ttoyry
+ */
+
+#include "instrumentation.h"
+unsigned int instrumentation_count_instructions = 0;
+unsigned int instrumentation_count_instructions_log = 0;
+
+/*
+static instr_counter_offsets instr_offsets;
+
+static inline void instr_count_inc_init(uint32_t offset,  int instr)
+{
+    if (!instrumentation_count_instructions) return;
+    instr_offsets.cpustate_offset = offset;
+    TCGv tmp = new_tmp();
+    tcg_gen_ld_i32(tmp, cpu_env, instr_offsets.cpustate_offset + sizeof(uint32_t) * instr);
+    instr_offsets.tcg_offset[0] = gen_opparam_ptr - 1;
+    tcg_gen_addi_i32(tmp, tmp, 1);
+    tcg_gen_st_i32(tmp, cpu_env, instr_offsets.cpustate_offset + sizeof(uint32_t) * instr);
+    instr_offsets.tcg_offset[1] = gen_opparam_ptr - 1;
+    dead_tmp(tmp);
+}
+
+ Increment instruction counter
+static inline void instr_count_inc(int instr)
+{
+    if (!instrumentation_count_instructions) return;
+    *(instr_offsets.tcg_offset[0]) = instr_offsets.cpustate_offset + sizeof(uint32_t) * instr;
+    *(instr_offsets.tcg_offset[1]) = instr_offsets.cpustate_offset + sizeof(uint32_t) * instr;
+}
+*/
+
+
diff -Nurp qemu-0.10.4/instrumentation.h qemu-0.10.4-instrcount/instrumentation.h
--- qemu-0.10.4/instrumentation.h	1970-01-01 02:00:00.000000000 +0200
+++ qemu-0.10.4-instrcount/instrumentation.h	2009-06-15 16:12:52.453488382 +0300
@@ -0,0 +1,47 @@
+/*
+ * instrumentation.h
+ *
+ * Author: Timo Toyry
+ */
+
+#ifndef INSTRUMENTATION_H
+#define INSTRUMENTATION_H
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+/*
+ * 0 to disable (default)
+ * nonzero to enable
+ */
+extern unsigned int instrumentation_count_instructions;
+
+/*
+ * 0 output counters to stderr (default)
+ * nonzero output counters to qemulog-file
+ */
+extern unsigned int instrumentation_count_instructions_log;
+
+/*
+extern TCGv_ptr cpu_env;
+extern TCGv_i32 new_tmp(void);
+extern void dead_tmp(TCGv tmp);
+
+#define ARM_INSTRUCTION_COUNTER_OFFSET offsetof(CPUState, arm_instr_count)
+#define ARM_VFP_INSTRUCTION_COUNTER_OFFSET offsetof(CPUState, arm_vfp_instr_count)
+#define ARM_THUMB_INSTRUCTION_COUNTER_OFFSET offsetof(CPUState, arm_thumb_instr_count)
+
+static inline void instr_count_inc_init(uint32_t offset,  int instr);
+static inline void instr_count_inc(int instr);
+
+typedef struct instr_counter_offsets {
+    uint32_t cpustate_offset;
+    TCGArg *tcg_offset[2];
+} instr_counter_offsets;
+*/
+
+
+#endif /* INSTRUMENTATION_H */
diff -Nurp qemu-0.10.4/linux-user/main.c qemu-0.10.4-instrcount/linux-user/main.c
--- qemu-0.10.4/linux-user/main.c	2009-05-12 17:56:03.000000000 +0300
+++ qemu-0.10.4-instrcount/linux-user/main.c	2009-06-15 16:12:52.453488382 +0300
@@ -35,6 +35,8 @@
 
 #include "envlist.h"
 
+#include "instrumentation.h"
+
 #define DEBUG_LOGFILE "/tmp/qemu.log"
 
 char *exec_path;
@@ -2199,6 +2201,12 @@ static void usage(void)
            "-p pagesize  set the host page size to 'pagesize'\n"
            "-strace      log system calls\n"
            "\n"
+#ifdef TARGET_ARM
+           "Other:\n"
+           "-instrcount             Count instructions\n"
+           "-instrcountlog=qemu_log Output counters to qemu-log file\n"
+           "\n"
+#endif
            "Environment variables:\n"
            "QEMU_STRACE       Print system calls and arguments similar to the\n"
            "                  'strace' program.  Enable by setting to any value.\n"
@@ -2341,8 +2349,11 @@ int main(int argc, char **argv, char **e
             (void) envlist_unsetenv(envlist, "LD_PRELOAD");
         } else if (!strcmp(r, "strace")) {
             do_strace = 1;
-        } else
-        {
+        } else if (!strcmp(r, "instrcount")) {
+            instrumentation_count_instructions = 1;
+        } else if (!strcmp(r, "instrcountlog=qemu_log")) {
+            instrumentation_count_instructions_log = 1;
+        } else {
             usage();
         }
     }
diff -Nurp qemu-0.10.4/linux-user/syscall.c qemu-0.10.4-instrcount/linux-user/syscall.c
--- qemu-0.10.4/linux-user/syscall.c	2009-05-12 17:56:03.000000000 +0300
+++ qemu-0.10.4-instrcount/linux-user/syscall.c	2009-06-15 16:12:52.453488382 +0300
@@ -79,6 +79,9 @@
 #include "qemu.h"
 #include "qemu-common.h"
 
+#include "qemu-log.h"
+#include "instrumentation.h"
+
 #if defined(USE_NPTL)
 #include <linux/futex.h>
 #define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \
@@ -300,6 +303,67 @@ extern int setfsuid(int);
 extern int setfsgid(int);
 extern int setgroups(int, gid_t *);
 
+
+inline static void print_counters(const char *buf)
+{
+    if (!instrumentation_count_instructions_log) {
+        fprintf(stderr, "%s", buf);
+    } else {
+        qemu_log("%s", buf);
+    }
+}
+
+inline static void print_instruction_counters_helper(
+    const char **instr_names, int no_instructions, const uint32_t *counters)
+{
+    char buf[256];
+    int i1;
+    uint32_t counted = 0;
+    for (i1 = 0; i1 < no_instructions; i1++) {
+        if (counters[i1] > 0) {
+            sprintf(buf, "%s: %d\n", instr_names[i1],
+                counters[i1]);
+            print_counters(buf);
+            if (i1 < (no_instructions - 2)) {
+                counted += counters[i1];
+            }
+        }
+    }
+    sprintf(buf, "Counted instructions: %d\n", counted);
+    print_counters(buf);
+}
+
+inline static void print_instruction_counters(CPUState *env)
+{
+    char buf[256];
+#ifdef TARGET_ARM
+    if (!instrumentation_count_instructions) return;
+    if (instrumentation_count_instructions_log && !logfile) {
+        fprintf(stderr, "Setting log file to \"qemu_instr_count.log\"\n");
+        cpu_set_log_filename("qemu_instr_count.log");
+        cpu_set_log(1);
+    }
+    
+    sprintf(buf, "Arm instructions:\n");
+    print_counters(buf);
+    print_instruction_counters_helper(arm_instr_names, ARM_INSTRUCTIONS,
+        env->arm_instr_count); 
+
+    sprintf(buf, "VFP instructions:\n");
+    print_counters(buf);
+    print_instruction_counters_helper(arm_vfp_instr_names,
+        ARM_VFP_INSTRUCTIONS, env->arm_vfp_instr_count); 
+
+    sprintf(buf, "Thumb instructions:\n");
+    print_counters(buf);
+    print_instruction_counters_helper(arm_thumb_instr_names,
+        ARM_THUMB_INSTRUCTIONS, env->arm_thumb_instr_count); 
+#else
+    sprintf(buf, "Instruction counting not supported in this platform.\n");
+    print_counters(buf);
+#endif
+}
+
 #define ERRNO_TABLE_SIZE 1200
 
 /* target_to_host_errno_table[] is initialized from
@@ -3431,6 +3495,7 @@ abi_long do_syscall(void *cpu_env, int n
 #ifdef HAVE_GPROF
         _mcleanup();
 #endif
+        print_instruction_counters(cpu_env);
         gdb_exit(cpu_env, arg1);
         /* XXX: should free thread stack and CPU env */
         sys_exit(arg1);
@@ -4923,6 +4988,7 @@ abi_long do_syscall(void *cpu_env, int n
 #ifdef HAVE_GPROF
         _mcleanup();
 #endif
+        print_instruction_counters(cpu_env);
         gdb_exit(cpu_env, arg1);
         ret = get_errno(exit_group(arg1));
         break;
diff -Nurp qemu-0.10.4/target-arm/cpu.h qemu-0.10.4-instrcount/target-arm/cpu.h
--- qemu-0.10.4/target-arm/cpu.h	2009-05-12 17:56:33.000000000 +0300
+++ qemu-0.10.4-instrcount/target-arm/cpu.h	2009-06-15 16:12:52.453488382 +0300
@@ -51,6 +51,340 @@
 #define ARMV7M_EXCP_PENDSV  14
 #define ARMV7M_EXCP_SYSTICK 15
 
+/* Do not change the order of the instructions in the blocks marked
+ * by - | -. */
+enum arm_instructions {
+    ARM_INSTRUCTION_B,
+    ARM_INSTRUCTION_BL,
+    ARM_INSTRUCTION_BLX,
+    ARM_INSTRUCTION_BX,
+    ARM_INSTRUCTION_BXJ,
+    ARM_INSTRUCTION_ADC,
+    ARM_INSTRUCTION_ADD,
+    ARM_INSTRUCTION_AND,
+    ARM_INSTRUCTION_BIC,
+    ARM_INSTRUCTION_CMN,
+    ARM_INSTRUCTION_CMP,
+    ARM_INSTRUCTION_EOR,
+    ARM_INSTRUCTION_MOV,
+    ARM_INSTRUCTION_MVN,
+    ARM_INSTRUCTION_ORR,
+    ARM_INSTRUCTION_RSB,
+    ARM_INSTRUCTION_RSC,
+    ARM_INSTRUCTION_SBC,
+    ARM_INSTRUCTION_SUB,
+    ARM_INSTRUCTION_TEQ,
+    ARM_INSTRUCTION_TST,
+    ARM_INSTRUCTION_MUL,  /* - */
+    ARM_INSTRUCTION_MULS, /* - */
+    ARM_INSTRUCTION_MLA,  /* - */
+    ARM_INSTRUCTION_MLAS, /* - */
+    ARM_INSTRUCTION_SMLAXY,
+    ARM_INSTRUCTION_SMLAL,  /* - */
+    ARM_INSTRUCTION_SMLALS, /* - */
+    ARM_INSTRUCTION_SMLALXY,
+    ARM_INSTRUCTION_SMLAWY,
+    ARM_INSTRUCTION_SMUAD, /* - */
+    ARM_INSTRUCTION_SMUSD, /* | */
+    ARM_INSTRUCTION_SMLAD, /* | */
+    ARM_INSTRUCTION_SMLSD, /* - */
+    ARM_INSTRUCTION_SMLALD, /* - */
+    ARM_INSTRUCTION_SMLSLD, /* - */
+    ARM_INSTRUCTION_SMMLA,
+    ARM_INSTRUCTION_SMMLS,
+    ARM_INSTRUCTION_SMMUL,
+    ARM_INSTRUCTION_SMULXY,
+    ARM_INSTRUCTION_SMULL,  /* - */
+    ARM_INSTRUCTION_SMULLS, /* - */
+    ARM_INSTRUCTION_SMULWY,
+    ARM_INSTRUCTION_UMAAL,
+    ARM_INSTRUCTION_UMLAL,  /* - */
+    ARM_INSTRUCTION_UMLALS, /* - */
+    ARM_INSTRUCTION_UMULL,  /* - */
+    ARM_INSTRUCTION_UMULLS, /* - */
+    ARM_INSTRUCTION_QADD,
+    ARM_INSTRUCTION_QDADD,
+    ARM_INSTRUCTION_QADD16,   /* - */
+    ARM_INSTRUCTION_QADDSUBX, /* | */
+    ARM_INSTRUCTION_QSUBADDX, /* | */
+    ARM_INSTRUCTION_QSUB16,   /* | */
+    ARM_INSTRUCTION_QADD8,    /* | */
+    ARM_INSTRUCTION_QSUB8,    /* - */
+    ARM_INSTRUCTION_QSUB,
+    ARM_INSTRUCTION_QDSUB,
+    ARM_INSTRUCTION_SADD16,   /* - */
+    ARM_INSTRUCTION_SADDSUBX, /* | */
+    ARM_INSTRUCTION_SSUBADDX, /* | */
+    ARM_INSTRUCTION_SSUB16,   /* | */
+    ARM_INSTRUCTION_SADD8,    /* | */
+    ARM_INSTRUCTION_SSUB8,    /* - */
+    ARM_INSTRUCTION_SHADD16,   /* - */
+    ARM_INSTRUCTION_SHADDSUBX, /* | */
+    ARM_INSTRUCTION_SHSUBADDX, /* | */
+    ARM_INSTRUCTION_SHSUB16,   /* | */
+    ARM_INSTRUCTION_SHADD8,    /* | */
+    ARM_INSTRUCTION_SHSUB8,    /* - */
+    ARM_INSTRUCTION_UADD16,   /* - */
+    ARM_INSTRUCTION_UADDSUBX, /* | */
+    ARM_INSTRUCTION_USUBADDX, /* | */
+    ARM_INSTRUCTION_USUB16,   /* | */
+    ARM_INSTRUCTION_UADD8,    /* | */
+    ARM_INSTRUCTION_USUB8,    /* - */
+    ARM_INSTRUCTION_UHADD16,   /* - */
+    ARM_INSTRUCTION_UHADDSUBX, /* | */
+    ARM_INSTRUCTION_UHSUBADDX, /* | */
+    ARM_INSTRUCTION_UHSUB16,   /* | */
+    ARM_INSTRUCTION_UHADD8,    /* | */
+    ARM_INSTRUCTION_UHSUB8,    /* - */
+    ARM_INSTRUCTION_UQADD16,   /* - */
+    ARM_INSTRUCTION_UQADDSUBX, /* | */
+    ARM_INSTRUCTION_UQSUBADDX, /* | */
+    ARM_INSTRUCTION_UQSUB16,   /* | */
+    ARM_INSTRUCTION_UQADD8,    /* | */
+    ARM_INSTRUCTION_UQSUB8,    /* - */
+    ARM_INSTRUCTION_SXTAB16, /* - */
+    ARM_INSTRUCTION_SXTAB,   /* | */
+    ARM_INSTRUCTION_SXTAH,   /* | */
+    ARM_INSTRUCTION_SXTB16,  /* | */
+    ARM_INSTRUCTION_SXTB,    /* | */
+    ARM_INSTRUCTION_SXTH,    /* - */
+    ARM_INSTRUCTION_UXTAB16, /* - */
+    ARM_INSTRUCTION_UXTAB,   /* | */
+    ARM_INSTRUCTION_UXTAH,   /* | */
+    ARM_INSTRUCTION_UXTB16,  /* | */
+    ARM_INSTRUCTION_UXTB,    /* | */
+    ARM_INSTRUCTION_UXTH,    /* - */
+    ARM_INSTRUCTION_CLZ,
+    ARM_INSTRUCTION_USAD8,
+    ARM_INSTRUCTION_USADA8,
+    ARM_INSTRUCTION_PKH,
+    ARM_INSTRUCTION_PKHBT,
+    ARM_INSTRUCTION_PKHTB,
+    ARM_INSTRUCTION_REV,
+    ARM_INSTRUCTION_REV16,
+    ARM_INSTRUCTION_REVSH,
+    ARM_INSTRUCTION_SEL,
+    ARM_INSTRUCTION_SSAT,
+    ARM_INSTRUCTION_SSAT16,
+    ARM_INSTRUCTION_USAT,
+    ARM_INSTRUCTION_USAT16,
+    ARM_INSTRUCTION_MRS,
+    ARM_INSTRUCTION_MSR,
+    ARM_INSTRUCTION_CPS,
+    ARM_INSTRUCTION_SETEND,
+    ARM_INSTRUCTION_LDR,
+    ARM_INSTRUCTION_LDRB,
+    ARM_INSTRUCTION_LDRBT,
+    ARM_INSTRUCTION_LDRD,
+    ARM_INSTRUCTION_LDREX,
+    ARM_INSTRUCTION_LDRH,
+    ARM_INSTRUCTION_LDRSB,
+    ARM_INSTRUCTION_LDRSH,
+    ARM_INSTRUCTION_LDRT,
+    ARM_INSTRUCTION_STR,
+    ARM_INSTRUCTION_STRB,
+    ARM_INSTRUCTION_STRBT,
+    ARM_INSTRUCTION_STRD,
+    ARM_INSTRUCTION_STREX,
+    ARM_INSTRUCTION_STRH,
+    ARM_INSTRUCTION_STRT,
+    ARM_INSTRUCTION_LDM1, //See Arm manual ARM DDI 0100I page A3-27
+    ARM_INSTRUCTION_LDM2,
+    ARM_INSTRUCTION_LDM3,
+    ARM_INSTRUCTION_STM1,
+    ARM_INSTRUCTION_STM2,
+    ARM_INSTRUCTION_SWP,
+    ARM_INSTRUCTION_SWPB,
+    ARM_INSTRUCTION_BKPT,
+    ARM_INSTRUCTION_SWI,
+    ARM_INSTRUCTION_CDP,
+    ARM_INSTRUCTION_LDC,
+    ARM_INSTRUCTION_MCR,
+    ARM_INSTRUCTION_MCRR,
+    ARM_INSTRUCTION_MRC,
+    ARM_INSTRUCTION_MRRC,
+    ARM_INSTRUCTION_STC,
+    ARM_INSTRUCTION_PLD,
+    ARM_INSTRUCTION_RFE,
+    ARM_INSTRUCTION_SRS,
+    ARM_INSTRUCTION_MCRR2,
+    ARM_INSTRUCTION_MRRC2,
+    ARM_INSTRUCTION_STC2,
+    ARM_INSTRUCTION_LDC2,
+    ARM_INSTRUCTION_CDP2,
+    ARM_INSTRUCTION_MCR2,
+    ARM_INSTRUCTION_MRC2,
+    ARM_INSTRUCTION_COPROCESSOR,
+    ARM_INSTRUCTION_UNKNOWN,
+    ARM_INSTRUCTION_NOT_INSTRUMENTED,
+    ARM_INSTRUCTION_TOTAL_COUNT,
+    ARM_INSTRUCTIONS
+};
+
+/* Do not change the order of the instructions in the blocks marked
+ * by - | -. */
+enum arm_vfp_instructions {
+    ARM_VFP_INSTRUCTION_FABSD, /* - */
+    ARM_VFP_INSTRUCTION_FABSS, /* - */
+    ARM_VFP_INSTRUCTION_FADDD, /* - */
+    ARM_VFP_INSTRUCTION_FADDS, /* - */
+    ARM_VFP_INSTRUCTION_FCMPD, /* - */
+    ARM_VFP_INSTRUCTION_FCMPS, /* - */
+    ARM_VFP_INSTRUCTION_FCMPED,  /* - */
+    ARM_VFP_INSTRUCTION_FCMPES,  /* - */
+    ARM_VFP_INSTRUCTION_FCMPEZD, /* - */
+    ARM_VFP_INSTRUCTION_FCMPEZS, /* - */
+    ARM_VFP_INSTRUCTION_FCMPZD, /* - */
+    ARM_VFP_INSTRUCTION_FCMPZS, /* - */
+    ARM_VFP_INSTRUCTION_FCPYD, /* - */
+    ARM_VFP_INSTRUCTION_FCPYS, /* - */
+    ARM_VFP_INSTRUCTION_FCVTDS, /* - */
+    ARM_VFP_INSTRUCTION_FCVTSD, /* - */
+    ARM_VFP_INSTRUCTION_FDIVD, /* - */
+    ARM_VFP_INSTRUCTION_FDIVS, /* - */
+    ARM_VFP_INSTRUCTION_FLDD, /* - */
+    ARM_VFP_INSTRUCTION_FLDS, /* - */
+    ARM_VFP_INSTRUCTION_FLDMD, /* - */
+    ARM_VFP_INSTRUCTION_FLDMS, /* - */
+    ARM_VFP_INSTRUCTION_FLDMX,
+    ARM_VFP_INSTRUCTION_FMACD,
+    ARM_VFP_INSTRUCTION_FMACS,
+    ARM_VFP_INSTRUCTION_FMDHR,
+    ARM_VFP_INSTRUCTION_FMDLR,
+    ARM_VFP_INSTRUCTION_FMDRR,
+    ARM_VFP_INSTRUCTION_FMRDH,
+    ARM_VFP_INSTRUCTION_FMRDL,
+    ARM_VFP_INSTRUCTION_FMRRD, /* - */
+    ARM_VFP_INSTRUCTION_FMRRS, /* - */
+    ARM_VFP_INSTRUCTION_FMRS,
+    ARM_VFP_INSTRUCTION_FMRX,
+    ARM_VFP_INSTRUCTION_FMSCD, /* - */
+    ARM_VFP_INSTRUCTION_FMSCS, /* - */
+    ARM_VFP_INSTRUCTION_FMSR,
+    ARM_VFP_INSTRUCTION_FMSRR,
+    ARM_VFP_INSTRUCTION_FMSTAT,
+    ARM_VFP_INSTRUCTION_FMULD, /* - */
+    ARM_VFP_INSTRUCTION_FMULS, /* - */
+    ARM_VFP_INSTRUCTION_FMXR,
+    ARM_VFP_INSTRUCTION_FNEGD, /* - */
+    ARM_VFP_INSTRUCTION_FNEGS, /* - */
+    ARM_VFP_INSTRUCTION_FNMACD, /* - */
+    ARM_VFP_INSTRUCTION_FNMACS, /* - */
+    ARM_VFP_INSTRUCTION_FNMSCD, /* - */
+    ARM_VFP_INSTRUCTION_FNMSCS, /* - */
+    ARM_VFP_INSTRUCTION_FNMULD, /* - */
+    ARM_VFP_INSTRUCTION_FNMULS, /* - */
+    ARM_VFP_INSTRUCTION_FSITOD, /* - */
+    ARM_VFP_INSTRUCTION_FSITOS, /* - */
+    ARM_VFP_INSTRUCTION_FSQRTD, /* - */
+    ARM_VFP_INSTRUCTION_FSQRTS, /* - */
+    ARM_VFP_INSTRUCTION_FSTD, /* - */
+    ARM_VFP_INSTRUCTION_FSTS, /* - */
+    ARM_VFP_INSTRUCTION_FSTMD,
+    ARM_VFP_INSTRUCTION_FSTMS,
+    ARM_VFP_INSTRUCTION_FSTMX,
+    ARM_VFP_INSTRUCTION_FSUBD, /* - */
+    ARM_VFP_INSTRUCTION_FSUBS, /* - */
+    ARM_VFP_INSTRUCTION_FTOSID, /* - */
+    ARM_VFP_INSTRUCTION_FTOSIS, /* - */
+    ARM_VFP_INSTRUCTION_FTOSIZD, /* - */
+    ARM_VFP_INSTRUCTION_FTOSIZS, /* - */
+    ARM_VFP_INSTRUCTION_FTOUID, /* - */
+    ARM_VFP_INSTRUCTION_FTOUIS, /* - */
+    ARM_VFP_INSTRUCTION_FTOUIZD, /* - */
+    ARM_VFP_INSTRUCTION_FTOUIZS, /* - */
+    ARM_VFP_INSTRUCTION_FUITOD, /* - */
+    ARM_VFP_INSTRUCTION_FUITOS, /* - */
+    ARM_VFP_INSTRUCTION_UNKNOWN,
+    ARM_VFP_INSTRUCTION_NOT_INSTRUMENTED,
+    ARM_VFP_INSTRUCTION_TOTAL_COUNT,
+    ARM_VFP_INSTRUCTIONS
+};
+
+/* Do not change the order of the instructions in the blocks marked
+ * by - | -. */
+enum arm_thumb_instructions {
+    ARM_THUMB_INSTRUCTION_ADC,
+    ARM_THUMB_INSTRUCTION_ADD1, /* - */
+    ARM_THUMB_INSTRUCTION_ADD2, /* | */
+    ARM_THUMB_INSTRUCTION_ADD3, /* - */
+    ARM_THUMB_INSTRUCTION_ADD4,
+    ARM_THUMB_INSTRUCTION_ADD5,
+    ARM_THUMB_INSTRUCTION_ADD6,
+    ARM_THUMB_INSTRUCTION_ADD7,
+    ARM_THUMB_INSTRUCTION_AND,
+    ARM_THUMB_INSTRUCTION_ASR1,
+    ARM_THUMB_INSTRUCTION_ASR2,
+    ARM_THUMB_INSTRUCTION_B1,
+    ARM_THUMB_INSTRUCTION_B2,
+    ARM_THUMB_INSTRUCTION_BIC,
+    ARM_THUMB_INSTRUCTION_BKPT,
+    ARM_THUMB_INSTRUCTION_BL,
+    ARM_THUMB_INSTRUCTION_BLX1,
+    ARM_THUMB_INSTRUCTION_BLX2,
+    ARM_THUMB_INSTRUCTION_BL_BLX_HIGH_PART,
+    ARM_THUMB_INSTRUCTION_BX,
+    ARM_THUMB_INSTRUCTION_CMN,
+    ARM_THUMB_INSTRUCTION_CMP1,
+    ARM_THUMB_INSTRUCTION_CMP2,
+    ARM_THUMB_INSTRUCTION_CMP3,
+    ARM_THUMB_INSTRUCTION_CPS,
+    ARM_THUMB_INSTRUCTION_CPY,
+    ARM_THUMB_INSTRUCTION_EOR,
+    ARM_THUMB_INSTRUCTION_LDMIA,
+    ARM_THUMB_INSTRUCTION_LDR1,
+    ARM_THUMB_INSTRUCTION_LDR2,
+    ARM_THUMB_INSTRUCTION_LDR3,
+    ARM_THUMB_INSTRUCTION_LDR4,
+    ARM_THUMB_INSTRUCTION_LDRB1,
+    ARM_THUMB_INSTRUCTION_LDRB2,
+    ARM_THUMB_INSTRUCTION_LDRH1,
+    ARM_THUMB_INSTRUCTION_LDRH2,
+    ARM_THUMB_INSTRUCTION_LDRSB,
+    ARM_THUMB_INSTRUCTION_LDRSH,
+    ARM_THUMB_INSTRUCTION_LSL1,
+    ARM_THUMB_INSTRUCTION_LSL2,
+    ARM_THUMB_INSTRUCTION_LSR1,
+    ARM_THUMB_INSTRUCTION_LSR2,
+    ARM_THUMB_INSTRUCTION_MOV1,
+    ARM_THUMB_INSTRUCTION_MOV2,
+    ARM_THUMB_INSTRUCTION_MOV3,
+    ARM_THUMB_INSTRUCTION_MUL,
+    ARM_THUMB_INSTRUCTION_MVN,
+    ARM_THUMB_INSTRUCTION_NEG,
+    ARM_THUMB_INSTRUCTION_ORR,
+    ARM_THUMB_INSTRUCTION_POP,
+    ARM_THUMB_INSTRUCTION_PUSH,
+    ARM_THUMB_INSTRUCTION_REV,
+    ARM_THUMB_INSTRUCTION_REV16,
+    ARM_THUMB_INSTRUCTION_REVSH,
+    ARM_THUMB_INSTRUCTION_ROR,
+    ARM_THUMB_INSTRUCTION_SBC,
+    ARM_THUMB_INSTRUCTION_SETEND,
+    ARM_THUMB_INSTRUCTION_STMIA,
+    ARM_THUMB_INSTRUCTION_STR1,
+    ARM_THUMB_INSTRUCTION_STR2,
+    ARM_THUMB_INSTRUCTION_STR3,
+    ARM_THUMB_INSTRUCTION_STRB1,
+    ARM_THUMB_INSTRUCTION_STRB2,
+    ARM_THUMB_INSTRUCTION_STRH1,
+    ARM_THUMB_INSTRUCTION_STRH2,
+    ARM_THUMB_INSTRUCTION_SUB1, /* - */
+    ARM_THUMB_INSTRUCTION_SUB2, /* | */
+    ARM_THUMB_INSTRUCTION_SUB3, /* - */
+    ARM_THUMB_INSTRUCTION_SUB4,
+    ARM_THUMB_INSTRUCTION_SWI,
+    ARM_THUMB_INSTRUCTION_SXTB,
+    ARM_THUMB_INSTRUCTION_SXTH,
+    ARM_THUMB_INSTRUCTION_TST,
+    ARM_THUMB_INSTRUCTION_UXTB,
+    ARM_THUMB_INSTRUCTION_UXTH,
+    ARM_THUMB_INSTRUCTION_UNKNOWN,
+    ARM_THUMB_INSTRUCTION_NOT_INSTRUMENTED,
+    ARM_THUMB_INSTRUCTION_TOTAL_COUNT,
+    ARM_THUMB_INSTRUCTIONS
+};
+
 typedef void ARMWriteCPFunc(void *opaque, int cp_info,
                             int srcreg, int operand, uint32_t value);
 typedef uint32_t ARMReadCPFunc(void *opaque, int cp_info,
@@ -202,6 +536,12 @@ typedef struct CPUARMState {
 
     /* These fields after the common ones so they are preserved on reset.  */
     struct arm_boot_info *boot_info;
+
+    /* Instruction counting "regs". */
+    uint32_t arm_instr_count[ARM_INSTRUCTIONS];
+    uint32_t arm_vfp_instr_count[ARM_VFP_INSTRUCTIONS];
+    uint32_t arm_thumb_instr_count[ARM_THUMB_INSTRUCTIONS];
+
 } CPUARMState;
 
 CPUARMState *cpu_arm_init(const char *cpu_model);
@@ -446,4 +786,8 @@ static inline void cpu_get_tb_cpu_state(
         *flags |= (1 << 7);
 }
 
+extern const char const *arm_instr_names[];
+extern const char const *arm_vfp_instr_names[];
+extern const char const *arm_thumb_instr_names[];
+
 #endif
diff -Nurp qemu-0.10.4/target-arm/translate.c qemu-0.10.4-instrcount/target-arm/translate.c
--- qemu-0.10.4/target-arm/translate.c	2009-05-12 17:56:33.000000000 +0300
+++ qemu-0.10.4-instrcount/target-arm/translate.c	2009-06-15 16:12:52.481605127 +0300
@@ -31,6 +31,8 @@
 #include "tcg-op.h"
 #include "qemu-log.h"
 
+#include "instrumentation.h"
+
 #include "helpers.h"
 #define GEN_HELPER 1
 #include "helpers.h"
@@ -235,6 +237,365 @@ static void store_reg(DisasContext *s, i
 /* Set NZCV flags from the high 4 bits of var.  */
 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 
+/* string names for arm_instruction enum values */
+const char const *arm_instr_names[] = {
+        "b",
+        "bl",
+        "blx",
+        "bx",
+        "bxj",
+        "adc",
+        "add",
+        "and",
+        "bic",
+        "cmn",
+        "cmp",
+        "eor",
+        "mov",
+        "mvn",
+        "orr",
+        "rsb",
+        "rsc",
+        "sbc",
+        "sub",
+        "teq",
+        "tst",
+        "mul",
+        "muls",
+        "mla",
+        "mlas",
+        "smla<x><y>",
+        "smlal",
+        "smlals",
+        "smlal<x><y>",
+        "smlaw<y>",
+        "smuad",
+        "smusd",
+        "smlad",
+        "smlsd",
+        "smlald",
+        "smlsld",
+        "smmla",
+        "smmls",
+        "smmul",
+        "smul<x><y>",
+        "smull",
+        "smulls",
+        "smulw<y>",
+        "umaal",
+        "umlal",
+        "umlals",
+        "umull",
+        "umulls",
+        "qadd",
+        "qdadd",
+        "qadd16",
+        "qaddsubx",
+        "qsubaddx",
+        "qsub16",
+        "qadd8",
+        "qsub8",
+        "qsub",
+        "qdsub",
+        "sadd16",
+        "saddsubx",
+        "ssubaddx",
+        "ssub16",
+        "sadd8",
+        "ssub8",
+        "shadd16",
+        "shaddsubx",
+        "shsubaddx",
+        "shsub16",
+        "shadd8",
+        "shsub8",
+        "uadd16",
+        "uaddsubx",
+        "usubaddx",
+        "usub16",
+        "uadd8",
+        "usub8",
+        "uhadd16",
+        "uhaddsubx",
+        "uhsubaddx",
+        "uhsub16",
+        "uhadd8",
+        "uhsub8",
+        "uqadd16",
+        "uqaddsubx",
+        "uqsubaddx",
+        "uqsub16",
+        "uqadd8",
+        "uqsub8",
+        "sxtab16",
+        "sxtab",
+        "sxtah",
+        "sxtb16",
+        "sxtb",
+        "sxth",
+        "uxtab16",
+        "uxtab",
+        "uxtah",
+        "uxtb16",
+        "uxtb",
+        "uxth",
+        "clz",
+        "usad8",
+        "usada8",
+        "pkh",
+        "pkhbt",
+        "pkhtb",
+        "rev",
+        "rev16",
+        "revsh",
+        "sel",
+        "ssat",
+        "ssat16",
+        "usat",
+        "usat16",
+        "mrs",
+        "msr",
+        "cps",
+        "setend",
+        "ldr",
+        "ldrb",
+        "ldrbt",
+        "ldrd",
+        "ldrex",
+        "ldrh",
+        "ldrsb",
+        "ldrsh",
+        "ldrt",
+        "str",
+        "strb",
+        "strbt",
+        "strd",
+        "strex",
+        "strh",
+        "strt",
+        "ldm1", //see arm manual ARM DDI 0100I page A3-27
+        "ldm2",
+        "ldm3",
+        "stm1",
+        "stm2",
+        "swp",
+        "swpb",
+        "bkpt",
+        "swi",
+        "cdp",
+        "ldc",
+        "mcr",
+        "mcrr",
+        "mrc",
+        "mrrc",
+        "stc",
+        "pld",
+        "rfe",
+        "srs",
+        "mcrr2",
+        "mrrc2",
+        "stc2",
+        "ldc2",
+        "cdp2",
+        "mcr2",
+        "mrc2",
+        "coprocessor",
+        "unknown",
+        "not_instrumented",
+        "total_instructions"
+};
+
+const char const *arm_vfp_instr_names[] = { /* string names for arm_vfp_instruction enum values */
+        "fabsd",
+        "fabss",
+        "faddd",
+        "fadds",
+        "fcmpd",
+        "fcmps",
+        "fcmped",
+        "fcmpes",
+        "fcmpezd",
+        "fcmpezs",
+        "fcmpzd",
+        "fcmpzs",
+        "fcpyd",
+        "fcpys",
+        "fcvtds",
+        "fcvtsd",
+        "fdivd",
+        "fdivs",
+        "fldd",
+        "flds",
+        "fldmd",
+        "fldms",
+        "fldmx",
+        "fmacd",
+        "fmacs",
+        "fmdhr",
+        "fmdlr",
+        "fmdrr",
+        "fmrdh",
+        "fmrdl",
+        "fmrrd",
+        "fmrrs",
+        "fmrs",
+        "fmrx",
+        "fmscd",
+        "fmscs",
+        "fmsr",
+        "fmsrr",
+        "fmstat",
+        "fmuld",
+        "fmuls",
+        "fmxr",
+        "fnegd",
+        "fnegs",
+        "fnmacd",
+        "fnmacs",
+        "fnmscd",
+        "fnmscs",
+        "fnmuld",
+        "fnmuls",
+        "fsitod",
+        "fsitos",
+        "fsqrtd",
+        "fsqrts",
+        "fstd",
+        "fsts",
+        "fstmd",
+        "fstms",
+        "fstmx",
+        "fsubd",
+        "fsubs",
+        "ftosid",
+        "ftosis",
+        "ftosizd",
+        "ftosizs",
+        "ftouid",
+        "ftouis",
+        "ftouizd",
+        "ftouizs",
+        "fuitod",
+        "fuitos",
+        "unknown",
+        "not_instrumented",
+        "total_count"
+};
+
+/* string names for arm_thumb_instruction enum values */
+const char const *arm_thumb_instr_names[] = {
+        "adc",
+        "add1",
+        "add2",
+        "add3",
+        "add4",
+        "add5",
+        "add6",
+        "add7",
+        "and",
+        "asr1",
+        "asr2",
+        "b1",
+        "b2",
+        "bic",
+        "bkpt",
+        "bl",
+        "blx1",
+        "blx2",
+        "bl-blx_high",
+        "bx",
+        "cmn",
+        "cmp1",
+        "cmp2",
+        "cmp3",
+        "cps",
+        "cpy",
+        "eor",
+        "ldmia",
+        "ldr1",
+        "ldr2",
+        "ldr3",
+        "ldr4",
+        "ldrb1",
+        "ldrb2",
+        "ldrh1",
+        "ldrh2",
+        "ldrsb",
+        "ldrsh",
+        "lsl1",
+        "lsl2",
+        "lsr1",
+        "lsr2",
+        "mov1",
+        "mov2",
+        "mov3",
+        "mul",
+        "mvn",
+        "neg",
+        "orr",
+        "pop",
+        "push",
+        "rev",
+        "rev16",
+        "revsh",
+        "ror",
+        "sbc",
+        "setend",
+        "stmia",
+        "str1",
+        "str2",
+        "str3",
+        "strb1",
+        "strb2",
+        "strh1",
+        "strh2",
+        "sub1",
+        "sub2",
+        "sub3",
+        "sub4",
+        "swi",
+        "sxtb",
+        "sxth",
+        "tst",
+        "uxtb",
+        "uxth",
+        "unknown",
+        "not_instrumented",
+        "total_count",
+};
+
+#define ARM_INSTRUCTION_COUNTER_OFFSET offsetof(CPUState, arm_instr_count)
+#define ARM_VFP_INSTRUCTION_COUNTER_OFFSET offsetof(CPUState, arm_vfp_instr_count)
+#define ARM_THUMB_INSTRUCTION_COUNTER_OFFSET offsetof(CPUState, arm_thumb_instr_count)
+
+typedef struct instr_counter_offsets {
+    uint32_t cpustate_offset;
+    TCGArg *tcg_offset[2];
+} instr_counter_offsets;
+
+static instr_counter_offsets instr_offsets;
+
+static inline void instr_count_inc_init(uint32_t offset,  int instr)
+{
+    if (!instrumentation_count_instructions) return;
+    instr_offsets.cpustate_offset = offset;
+    TCGv tmp = new_tmp();
+    tcg_gen_ld_i32(tmp, cpu_env, instr_offsets.cpustate_offset + sizeof(uint32_t) * instr);
+    instr_offsets.tcg_offset[0] = gen_opparam_ptr - 1;
+    tcg_gen_addi_i32(tmp, tmp, 1);
+    tcg_gen_st_i32(tmp, cpu_env, instr_offsets.cpustate_offset + sizeof(uint32_t) * instr);
+    instr_offsets.tcg_offset[1] = gen_opparam_ptr - 1;
+    dead_tmp(tmp);
+}
+
+/* Increment instruction counter */
+static inline void instr_count_inc(int instr)
+{
+    if (!instrumentation_count_instructions) return;
+    *(instr_offsets.tcg_offset[0]) = instr_offsets.cpustate_offset + sizeof(uint32_t) * instr;
+    *(instr_offsets.tcg_offset[1]) = instr_offsets.cpustate_offset + sizeof(uint32_t) * instr;
+}
+
 static void gen_exception(int excp)
 {
     TCGv tmp = new_tmp();
@@ -580,6 +941,7 @@ static inline void gen_arm_shift_reg(TCG
 static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
 {
     TCGv_ptr tmp;
+    unsigned int instr_index = 0;
 
     switch (op1) {
 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
@@ -587,28 +949,37 @@ static void gen_arm_parallel_addsub(int 
         tmp = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
         PAS_OP(s)
+        instr_index = ARM_INSTRUCTION_SADD16;
         break;
     case 5:
         tmp = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
         PAS_OP(u)
+        instr_index = ARM_INSTRUCTION_UADD16;
         break;
 #undef gen_pas_helper
 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
     case 2:
         PAS_OP(q);
+        instr_index = ARM_INSTRUCTION_QADD16;
         break;
     case 3:
         PAS_OP(sh);
+        instr_index = ARM_INSTRUCTION_SHADD16;
         break;
     case 6:
         PAS_OP(uq);
+        instr_index = ARM_INSTRUCTION_UQADD16;
         break;
     case 7:
         PAS_OP(uh);
+        instr_index = ARM_INSTRUCTION_UHADD16;
         break;
 #undef gen_pas_helper
     }
+    if (op2 == 7) instr_index += 5;
+    else instr_index += op2;
+    instr_count_inc(instr_index);
 }
 #undef PAS_OP
 
@@ -2702,6 +3073,9 @@ static int disas_vfp_insn(CPUState * env
     TCGv tmp;
     TCGv tmp2;
 
+    instr_count_inc_init(ARM_VFP_INSTRUCTION_COUNTER_OFFSET,
+            ARM_VFP_INSTRUCTION_NOT_INSTRUMENTED);
+
     if (!arm_feature(env, ARM_FEATURE_VFP))
         return 1;
 
@@ -2724,6 +3098,8 @@ static int disas_vfp_insn(CPUState * env
                 int size;
                 int pass;
 
+                instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
+
                 VFP_DREG_N(rn, insn);
                 if (insn & 0xf)
                     return 1;
@@ -2770,6 +3146,11 @@ static int disas_vfp_insn(CPUState * env
                         }
                         break;
                     case 2:
+                        if (pass) {
+                            instr_count_inc(ARM_VFP_INSTRUCTION_FMRDH);
+                        } else {
+                            instr_count_inc(ARM_VFP_INSTRUCTION_FMRDL);
+                        }
                         break;
                     }
                     store_reg(s, rd, tmp);
@@ -2801,6 +3182,11 @@ static int disas_vfp_insn(CPUState * env
                             dead_tmp(tmp2);
                             break;
                         case 2:
+                            if (pass) {
+                                instr_count_inc(ARM_VFP_INSTRUCTION_FMDHR);
+                            } else {
+                                instr_count_inc(ARM_VFP_INSTRUCTION_FMDLR);
+                            }
                             break;
                         }
                         neon_store_reg(rn, pass, tmp);
@@ -2814,6 +3200,7 @@ static int disas_vfp_insn(CPUState * env
                     /* vfp->arm */
                     if (insn & (1 << 21)) {
                         /* system register */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FMRX);
                         rn >>= 1;
 
                         switch (rn) {
@@ -2841,6 +3228,7 @@ static int disas_vfp_insn(CPUState * env
                             break;
                         case ARM_VFP_FPSCR:
                             if (rd == 15) {
+                                instr_count_inc(ARM_VFP_INSTRUCTION_FMSTAT);
                                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
                                 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
                             } else {
@@ -2859,6 +3247,7 @@ static int disas_vfp_insn(CPUState * env
                             return 1;
                         }
                     } else {
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FMRS);
                         gen_mov_F0_vreg(0, rn);
                         tmp = gen_vfp_mrs();
                     }
@@ -2873,6 +3262,7 @@ static int disas_vfp_insn(CPUState * env
                     /* arm->vfp */
                     tmp = load_reg(s, rd);
                     if (insn & (1 << 21)) {
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FMXR);
                         rn >>= 1;
                         /* system register */
                         switch (rn) {
@@ -2900,6 +3290,7 @@ static int disas_vfp_insn(CPUState * env
                             return 1;
                         }
                     } else {
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FMSR);
                         gen_vfp_msr(tmp);
                         gen_mov_vreg_F0(0, rn);
                     }
@@ -3023,44 +3414,54 @@ static int disas_vfp_insn(CPUState * env
                 /* Perform the calculation.  */
                 switch (op) {
                 case 0: /* mac: fd + (fn * fm) */
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FMACD + (1 - dp));
                     gen_vfp_mul(dp);
                     gen_mov_F1_vreg(dp, rd);
                     gen_vfp_add(dp);
                     break;
                 case 1: /* nmac: fd - (fn * fm) */
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FNMACD + (1 - dp));
                     gen_vfp_mul(dp);
                     gen_vfp_neg(dp);
                     gen_mov_F1_vreg(dp, rd);
                     gen_vfp_add(dp);
                     break;
                 case 2: /* msc: -fd + (fn * fm) */
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FMSCD + (1 - dp));
                     gen_vfp_mul(dp);
                     gen_mov_F1_vreg(dp, rd);
                     gen_vfp_sub(dp);
                     break;
                 case 3: /* nmsc: -fd - (fn * fm)  */
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FNMSCD + (1 - dp));
                     gen_vfp_mul(dp);
                     gen_vfp_neg(dp);
                     gen_mov_F1_vreg(dp, rd);
                     gen_vfp_sub(dp);
                     break;
                 case 4: /* mul: fn * fm */
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FMULD + (1 - dp));
                     gen_vfp_mul(dp);
                     break;
                 case 5: /* nmul: -(fn * fm) */
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FNMULD + (1 - dp));
                     gen_vfp_mul(dp);
                     gen_vfp_neg(dp);
                     break;
                 case 6: /* add: fn + fm */
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FADDD + (1 - dp));
                     gen_vfp_add(dp);
                     break;
                 case 7: /* sub: fn - fm */
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FSUBD + (1 - dp));
                     gen_vfp_sub(dp);
                     break;
                 case 8: /* div: fn / fm */
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FDIVD + (1 - dp));
                     gen_vfp_div(dp);
                     break;
                 case 14: /* fconst */
+                    instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                     if (!arm_feature(env, ARM_FEATURE_VFP3))
                       return 1;
 
@@ -3085,90 +3486,116 @@ static int disas_vfp_insn(CPUState * env
                 case 15: /* extension space */
                     switch (rn) {
                     case 0: /* cpy */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FCPYD + (1 - dp));
                         /* no-op */
                         break;
                     case 1: /* abs */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FABSD + (1 - dp));
                         gen_vfp_abs(dp);
                         break;
                     case 2: /* neg */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FNEGD + (1 - dp));
                         gen_vfp_neg(dp);
                         break;
                     case 3: /* sqrt */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FSQRTD + (1 - dp));
                         gen_vfp_sqrt(dp);
                         break;
                     case 8: /* cmp */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FCMPD + (1 - dp));
                         gen_vfp_cmp(dp);
                         break;
                     case 9: /* cmpe */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FCMPED + (1 - dp));
                         gen_vfp_cmpe(dp);
                         break;
                     case 10: /* cmpz */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FCMPZD + (1 - dp));
                         gen_vfp_cmp(dp);
                         break;
                     case 11: /* cmpez */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FCMPEZD + (1 - dp));
                         gen_vfp_F1_ld0(dp);
                         gen_vfp_cmpe(dp);
                         break;
                     case 15: /* single<->double conversion */
-                        if (dp)
+                        if (dp) {
+                            instr_count_inc(ARM_VFP_INSTRUCTION_FCVTSD);
                             gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
-                        else
+                        }
+                        else {
+                            instr_count_inc(ARM_VFP_INSTRUCTION_FCVTDS);
                             gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
+                        }
                         break;
                     case 16: /* fuito */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FUITOD + (1 - dp));
                         gen_vfp_uito(dp);
                         break;
                     case 17: /* fsito */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FSITOD + (1 - dp));
                         gen_vfp_sito(dp);
                         break;
                     case 20: /* fshto */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_shto(dp, 16 - rm);
                         break;
                     case 21: /* fslto */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_slto(dp, 32 - rm);
                         break;
                     case 22: /* fuhto */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_uhto(dp, 16 - rm);
                         break;
                     case 23: /* fulto */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_ulto(dp, 32 - rm);
                         break;
                     case 24: /* ftoui */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FTOUID + (1 - dp));
                         gen_vfp_toui(dp);
                         break;
                     case 25: /* ftouiz */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FTOUIZD + (1 - dp));
                         gen_vfp_touiz(dp);
                         break;
                     case 26: /* ftosi */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FTOSID + (1 - dp));
                         gen_vfp_tosi(dp);
                         break;
                     case 27: /* ftosiz */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FTOSIZD + (1 - dp));
                         gen_vfp_tosiz(dp);
                         break;
                     case 28: /* ftosh */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_tosh(dp, 16 - rm);
                         break;
                     case 29: /* ftosl */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_tosl(dp, 32 - rm);
                         break;
                     case 30: /* ftouh */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_touh(dp, 16 - rm);
                         break;
                     case 31: /* ftoul */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_toul(dp, 32 - rm);
@@ -3247,6 +3674,7 @@ static int disas_vfp_insn(CPUState * env
             if (insn & ARM_CP_RW_BIT) {
                 /* vfp->arm */
                 if (dp) {
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FMRRD);
                     gen_mov_F0_vreg(0, rm * 2);
                     tmp = gen_vfp_mrs();
                     store_reg(s, rd, tmp);
@@ -3254,6 +3682,7 @@ static int disas_vfp_insn(CPUState * env
                     tmp = gen_vfp_mrs();
                     store_reg(s, rn, tmp);
                 } else {
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FMRRS);
                     gen_mov_F0_vreg(0, rm);
                     tmp = gen_vfp_mrs();
                     store_reg(s, rn, tmp);
@@ -3264,6 +3693,7 @@ static int disas_vfp_insn(CPUState * env
             } else {
                 /* arm->vfp */
                 if (dp) {
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FMDRR);
                     tmp = load_reg(s, rd);
                     gen_vfp_msr(tmp);
                     gen_mov_vreg_F0(0, rm * 2);
@@ -3271,6 +3701,7 @@ static int disas_vfp_insn(CPUState * env
                     gen_vfp_msr(tmp);
                     gen_mov_vreg_F0(0, rm * 2 + 1);
                 } else {
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FMSRR);
                     tmp = load_reg(s, rn);
                     gen_vfp_msr(tmp);
                     gen_mov_vreg_F0(0, rm);
@@ -3298,9 +3729,11 @@ static int disas_vfp_insn(CPUState * env
                     offset = -offset;
                 gen_op_addl_T1_im(offset);
                 if (insn & (1 << 20)) {
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FLDD + (1 - dp));
                     gen_vfp_ld(s, dp);
                     gen_mov_vreg_F0(dp, rd);
                 } else {
+                    instr_count_inc(ARM_VFP_INSTRUCTION_FSTD + (1 - dp));
                     gen_mov_F0_vreg(dp, rd);
                     gen_vfp_st(s, dp);
                 }
@@ -3321,10 +3754,12 @@ static int disas_vfp_insn(CPUState * env
                 for (i = 0; i < n; i++) {
                     if (insn & ARM_CP_RW_BIT) {
                         /* load */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FLDMD + (1 - dp));
                         gen_vfp_ld(s, dp);
                         gen_mov_vreg_F0(dp, rd + i);
                     } else {
                         /* store */
+                        instr_count_inc(ARM_VFP_INSTRUCTION_FSTMD + (1 - dp));
                         gen_mov_F0_vreg(dp, rd + i);
                         gen_vfp_st(s, dp);
                     }
@@ -5697,6 +6132,7 @@ static void gen_logicq_cc(TCGv_i64 val)
 static void disas_arm_insn(CPUState * env, DisasContext *s)
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
+    unsigned int instr_index = 0;
     TCGv tmp;
     TCGv tmp2;
     TCGv tmp3;
@@ -5706,14 +6142,18 @@ static void disas_arm_insn(CPUState * en
     insn = ldl_code(s->pc);
     s->pc += 4;
 
+    instr_count_inc_init(ARM_INSTRUCTION_COUNTER_OFFSET,
+            ARM_INSTRUCTION_NOT_INSTRUMENTED);
+
     /* M variants do not implement ARM mode.  */
     if (IS_M(env))
         goto illegal_op;
     cond = insn >> 28;
-    if (cond == 0xf){
+    if (cond == 0xf) {
         /* Unconditional instructions.  */
         if (((insn >> 25) & 7) == 1) {
             /* NEON Data processing.  */
+            instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
             if (!arm_feature(env, ARM_FEATURE_NEON))
                 goto illegal_op;
 
@@ -5723,6 +6163,7 @@ static void disas_arm_insn(CPUState * en
         }
         if ((insn & 0x0f100000) == 0x04000000) {
             /* NEON load/store.  */
+            instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
             if (!arm_feature(env, ARM_FEATURE_NEON))
                 goto illegal_op;
 
@@ -5734,6 +6175,7 @@ static void disas_arm_insn(CPUState * en
             return; /* PLD */
         else if ((insn & 0x0ffffdff) == 0x01010000) {
             ARCH(6);
+            instr_count_inc(ARM_INSTRUCTION_SETEND);
             /* setend */
             if (insn & (1 << 9)) {
                 /* BE8 mode not implemented.  */
@@ -5744,11 +6186,13 @@ static void disas_arm_insn(CPUState * en
             switch ((insn >> 4) & 0xf) {
             case 1: /* clrex */
                 ARCH(6K);
+                instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                 gen_helper_clrex(cpu_env);
                 return;
             case 4: /* dsb */
             case 5: /* dmb */
             case 6: /* isb */
+                instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                 ARCH(7);
                 /* We don't emulate caches so these are a no-op.  */
                 return;
@@ -5758,6 +6202,7 @@ static void disas_arm_insn(CPUState * en
         } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
             /* srs */
             uint32_t offset;
+            instr_count_inc(ARM_INSTRUCTION_SRS);
             if (IS_USER(s))
                 goto illegal_op;
             ARCH(6);
@@ -5808,6 +6253,7 @@ static void disas_arm_insn(CPUState * en
             uint32_t offset;
             if (IS_USER(s))
                 goto illegal_op;
+            instr_count_inc(ARM_INSTRUCTION_RFE);
             ARCH(6);
             rn = (insn >> 16) & 0xf;
             addr = load_reg(s, rn);
@@ -5844,7 +6290,7 @@ static void disas_arm_insn(CPUState * en
         } else if ((insn & 0x0e000000) == 0x0a000000) {
             /* branch link and change to thumb (blx <offset>) */
             int32_t offset;
-
+            instr_count_inc(ARM_INSTRUCTION_BLX);
             val = (uint32_t)s->pc;
             tmp = new_tmp();
             tcg_gen_movi_i32(tmp, val);
@@ -5858,6 +6304,7 @@ static void disas_arm_insn(CPUState * en
             gen_bx_im(s, val);
             return;
         } else if ((insn & 0x0e000f00) == 0x0c000100) {
+            instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
             if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
                 /* iWMMXt register transfer.  */
                 if (env->cp15.c15_cpar & (1 << 1))
@@ -5865,13 +6312,16 @@ static void disas_arm_insn(CPUState * en
                         return;
             }
         } else if ((insn & 0x0fe00000) == 0x0c400000) {
+            instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
             /* Coprocessor double register transfer.  */
         } else if ((insn & 0x0f000010) == 0x0e000010) {
+            instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
             /* Additional coprocessor register transfer.  */
         } else if ((insn & 0x0ff10020) == 0x01000000) {
             uint32_t mask;
             uint32_t val;
             /* cps (privileged) */
+            instr_count_inc(ARM_INSTRUCTION_CPS);
             if (IS_USER(s))
                 return;
             mask = val = 0;
@@ -5911,10 +6361,12 @@ static void disas_arm_insn(CPUState * en
             val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
             if ((insn & (1 << 22)) == 0) {
                 /* MOVW */
+                instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                 tmp = new_tmp();
                 tcg_gen_movi_i32(tmp, val);
             } else {
                 /* MOVT */
+                instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                 tmp = load_reg(s, rd);
                 tcg_gen_ext16u_i32(tmp, tmp);
                 tcg_gen_ori_i32(tmp, tmp, val << 16);
@@ -5924,9 +6376,11 @@ static void disas_arm_insn(CPUState * en
             if (((insn >> 12) & 0xf) != 0xf)
                 goto illegal_op;
             if (((insn >> 16) & 0xf) == 0) {
+                instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                 gen_nop_hint(s, insn & 0xff);
             } else {
                 /* CPSR = immediate */
+                instr_count_inc(ARM_INSTRUCTION_MSR);
                 val = insn & 0xff;
                 shift = ((insn >> 8) & 0xf) * 2;
                 if (shift)
@@ -5947,12 +6401,14 @@ static void disas_arm_insn(CPUState * en
         case 0x0: /* move program status register */
             if (op1 & 1) {
                 /* PSR = reg */
+                instr_count_inc(ARM_INSTRUCTION_MSR);
                 gen_movl_T0_reg(s, rm);
                 i = ((op1 & 2) != 0);
                 if (gen_set_psr_T0(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i))
                     goto illegal_op;
             } else {
                 /* reg = PSR */
+                instr_count_inc(ARM_INSTRUCTION_MRS);
                 rd = (insn >> 12) & 0xf;
                 if (op1 & 2) {
                     if (IS_USER(s))
@@ -5968,10 +6424,12 @@ static void disas_arm_insn(CPUState * en
         case 0x1:
             if (op1 == 1) {
                 /* branch/exchange thumb (bx).  */
+                instr_count_inc(ARM_INSTRUCTION_BX);
                 tmp = load_reg(s, rm);
                 gen_bx(s, tmp);
             } else if (op1 == 3) {
                 /* clz */
+                instr_count_inc(ARM_INSTRUCTION_CLZ);
                 rd = (insn >> 12) & 0xf;
                 tmp = load_reg(s, rm);
                 gen_helper_clz(tmp, tmp);
@@ -5982,6 +6440,7 @@ static void disas_arm_insn(CPUState * en
             break;
         case 0x2:
             if (op1 == 1) {
+                instr_count_inc(ARM_INSTRUCTION_BXJ);
                 ARCH(5J); /* bxj */
                 /* Trivial implementation equivalent to bx.  */
                 tmp = load_reg(s, rm);
@@ -5994,6 +6453,7 @@ static void disas_arm_insn(CPUState * en
             if (op1 != 1)
               goto illegal_op;
 
+            instr_count_inc(ARM_INSTRUCTION_BLX);
             /* branch link/exchange thumb (blx) */
             tmp = load_reg(s, rm);
             tmp2 = new_tmp();
@@ -6006,16 +6466,24 @@ static void disas_arm_insn(CPUState * en
             rn = (insn >> 16) & 0xf;
             tmp = load_reg(s, rm);
             tmp2 = load_reg(s, rn);
-            if (op1 & 2)
+            if (op1 & 2) {
                 gen_helper_double_saturate(tmp2, tmp2);
-            if (op1 & 1)
+                if (op1 & 1) instr_count_inc(ARM_INSTRUCTION_QDSUB);
+                else instr_count_inc(ARM_INSTRUCTION_QDADD);
+            }
+            if (op1 & 1) {
                 gen_helper_sub_saturate(tmp, tmp, tmp2);
-            else
+                instr_count_inc(ARM_INSTRUCTION_QSUB);
+            }
+            else {
                 gen_helper_add_saturate(tmp, tmp, tmp2);
+                instr_count_inc(ARM_INSTRUCTION_QADD);
+            }
             dead_tmp(tmp2);
             store_reg(s, rd, tmp);
             break;
         case 7: /* bkpt */
+            instr_count_inc(ARM_INSTRUCTION_BKPT);
             gen_set_condexec(s);
             gen_set_pc_im(s->pc - 4);
             gen_exception(EXCP_BKPT);
@@ -6041,18 +6509,22 @@ static void disas_arm_insn(CPUState * en
                 tmp = new_tmp();
                 tcg_gen_trunc_i64_i32(tmp, tmp64);
                 if ((sh & 2) == 0) {
+                    instr_count_inc(ARM_INSTRUCTION_SMLAWY);
                     tmp2 = load_reg(s, rn);
                     gen_helper_add_setq(tmp, tmp, tmp2);
                     dead_tmp(tmp2);
                 }
+                else instr_count_inc(ARM_INSTRUCTION_SMULWY);
                 store_reg(s, rd, tmp);
             } else {
                 /* 16 * 16 */
+                if (op1 == 3) instr_count_inc(ARM_INSTRUCTION_SMULXY);
                 tmp = load_reg(s, rm);
                 tmp2 = load_reg(s, rs);
                 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
                 dead_tmp(tmp2);
                 if (op1 == 2) {
+                    instr_count_inc(ARM_INSTRUCTION_SMLALXY);
                     tmp64 = tcg_temp_new_i64();
                     tcg_gen_ext_i32_i64(tmp64, tmp);
                     dead_tmp(tmp);
@@ -6060,6 +6532,7 @@ static void disas_arm_insn(CPUState * en
                     gen_storeq_reg(s, rn, rd, tmp64);
                 } else {
                     if (op1 == 0) {
+                        instr_count_inc(ARM_INSTRUCTION_SMLAXY);
                         tmp2 = load_reg(s, rn);
                         gen_helper_add_setq(tmp, tmp, tmp2);
                         dead_tmp(tmp2);
@@ -6111,18 +6584,21 @@ static void disas_arm_insn(CPUState * en
         rd = (insn >> 12) & 0xf;
         switch(op1) {
         case 0x00:
+            instr_count_inc(ARM_INSTRUCTION_AND);
             gen_op_andl_T0_T1();
             gen_movl_reg_T0(s, rd);
             if (logic_cc)
                 gen_op_logic_T0_cc();
             break;
         case 0x01:
+            instr_count_inc(ARM_INSTRUCTION_EOR);
             gen_op_xorl_T0_T1();
             gen_movl_reg_T0(s, rd);
             if (logic_cc)
                 gen_op_logic_T0_cc();
             break;
         case 0x02:
+            instr_count_inc(ARM_INSTRUCTION_SUB);
             if (set_cc && rd == 15) {
                 /* SUBS r15, ... is used for exception return.  */
                 if (IS_USER(s))
@@ -6138,6 +6614,7 @@ static void disas_arm_insn(CPUState * en
             }
             break;
         case 0x03:
+            instr_count_inc(ARM_INSTRUCTION_RSB);
             if (set_cc)
                 gen_op_rsbl_T0_T1_cc();
             else
@@ -6145,6 +6622,7 @@ static void disas_arm_insn(CPUState * en
             gen_movl_reg_T0(s, rd);
             break;
         case 0x04:
+            instr_count_inc(ARM_INSTRUCTION_ADD);
             if (set_cc)
                 gen_op_addl_T0_T1_cc();
             else
@@ -6152,6 +6630,7 @@ static void disas_arm_insn(CPUState * en
             gen_movl_reg_T0(s, rd);
             break;
         case 0x05:
+            instr_count_inc(ARM_INSTRUCTION_ADC);
             if (set_cc)
                 gen_op_adcl_T0_T1_cc();
             else
@@ -6159,6 +6638,7 @@ static void disas_arm_insn(CPUState * en
             gen_movl_reg_T0(s, rd);
             break;
         case 0x06:
+            instr_count_inc(ARM_INSTRUCTION_SBC);
             if (set_cc)
                 gen_op_sbcl_T0_T1_cc();
             else
@@ -6166,6 +6646,7 @@ static void disas_arm_insn(CPUState * en
             gen_movl_reg_T0(s, rd);
             break;
         case 0x07:
+            instr_count_inc(ARM_INSTRUCTION_RSC);
             if (set_cc)
                 gen_op_rscl_T0_T1_cc();
             else
@@ -6173,34 +6654,40 @@ static void disas_arm_insn(CPUState * en
             gen_movl_reg_T0(s, rd);
             break;
         case 0x08:
+            instr_count_inc(ARM_INSTRUCTION_TST);
             if (set_cc) {
                 gen_op_andl_T0_T1();
                 gen_op_logic_T0_cc();
             }
             break;
         case 0x09:
+            instr_count_inc(ARM_INSTRUCTION_TEQ);
             if (set_cc) {
                 gen_op_xorl_T0_T1();
                 gen_op_logic_T0_cc();
             }
             break;
         case 0x0a:
+            instr_count_inc(ARM_INSTRUCTION_CMP);
             if (set_cc) {
                 gen_op_subl_T0_T1_cc();
             }
             break;
         case 0x0b:
+            instr_count_inc(ARM_INSTRUCTION_CMN);
             if (set_cc) {
                 gen_op_addl_T0_T1_cc();
             }
             break;
         case 0x0c:
+            instr_count_inc(ARM_INSTRUCTION_ORR);
             gen_op_orl_T0_T1();
             gen_movl_reg_T0(s, rd);
             if (logic_cc)
                 gen_op_logic_T0_cc();
             break;
         case 0x0d:
+            instr_count_inc(ARM_INSTRUCTION_MOV);
             if (logic_cc && rd == 15) {
                 /* MOVS r15, ... is used for exception return.  */
                 if (IS_USER(s))
@@ -6214,6 +6701,7 @@ static void disas_arm_insn(CPUState * en
             }
             break;
         case 0x0e:
+            instr_count_inc(ARM_INSTRUCTION_BIC);
             gen_op_bicl_T0_T1();
             gen_movl_reg_T0(s, rd);
             if (logic_cc)
@@ -6221,6 +6709,7 @@ static void disas_arm_insn(CPUState * en
             break;
         default:
         case 0x0f:
+            instr_count_inc(ARM_INSTRUCTION_MVN);
             gen_op_notl_T1();
             gen_movl_reg_T1(s, rd);
             if (logic_cc)
@@ -6245,43 +6734,63 @@ static void disas_arm_insn(CPUState * en
                     switch (op1) {
                     case 0: case 1: case 2: case 3: case 6:
                         /* 32 bit mul */
+                        instr_index = ARM_INSTRUCTION_MUL;
                         tmp = load_reg(s, rs);
                         tmp2 = load_reg(s, rm);
                         tcg_gen_mul_i32(tmp, tmp, tmp2);
                         dead_tmp(tmp2);
                         if (insn & (1 << 22)) {
                             /* Subtract (mls) */
+                            instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                             ARCH(6T2);
                             tmp2 = load_reg(s, rn);
                             tcg_gen_sub_i32(tmp, tmp2, tmp);
                             dead_tmp(tmp2);
                         } else if (insn & (1 << 21)) {
                             /* Add */
+                            instr_index = ARM_INSTRUCTION_MLA;
                             tmp2 = load_reg(s, rn);
                             tcg_gen_add_i32(tmp, tmp, tmp2);
                             dead_tmp(tmp2);
                         }
-                        if (insn & (1 << 20))
+                        if (insn & (1 << 20)) {
                             gen_logic_CC(tmp);
+                            instr_index++; /* MULS and MLAS */
+                        }
+                        instr_count_inc(instr_index);
                         store_reg(s, rd, tmp);
                         break;
                     default:
                         /* 64 bit mul */
                         tmp = load_reg(s, rs);
                         tmp2 = load_reg(s, rm);
-                        if (insn & (1 << 22))
+                        if (insn & (1 << 22)) {
                             tmp64 = gen_muls_i64_i32(tmp, tmp2);
-                        else
+                            instr_index = ARM_INSTRUCTION_SMULL;
+                        }
+                        else {
                             tmp64 = gen_mulu_i64_i32(tmp, tmp2);
-                        if (insn & (1 << 21)) /* mult accumulate */
+                            instr_index = ARM_INSTRUCTION_UMULL;
+                        }
+                        if (insn & (1 << 21)) {
+                            /* mult accumulate */
                             gen_addq(s, tmp64, rn, rd);
+                            if (insn & (1 << 22)) {
+                                instr_index = ARM_INSTRUCTION_SMLAL;
+                            } else {
+                                instr_index = ARM_INSTRUCTION_UMLAL;
+                            }
+                        }
                         if (!(insn & (1 << 23))) { /* double accumulate */
                             ARCH(6);
                             gen_addq_lo(s, tmp64, rn);
                             gen_addq_lo(s, tmp64, rd);
                         }
-                        if (insn & (1 << 20))
+                        if (insn & (1 << 20)) {
                             gen_logicq_cc(tmp64);
+                            instr_index++; /* SMULLS, UMULLS, SMLALS, UMLALS */
+                        }
+                        instr_count_inc(instr_index);
                         gen_storeq_reg(s, rn, rd, tmp64);
                         break;
                     }
@@ -6300,6 +6809,7 @@ static void disas_arm_insn(CPUState * en
                         if (insn & (1 << 20)) {
                             gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
                             switch (op1) {
+                            instr_count_inc(ARM_INSTRUCTION_LDREX);
                             case 0: /* ldrex */
                                 tmp = gen_ld32(addr, IS_USER(s));
                                 break;
@@ -6327,6 +6837,7 @@ static void disas_arm_insn(CPUState * en
                             tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0],
                                                 0, label);
                             tmp = load_reg(s,rm);
+                            instr_count_inc(ARM_INSTRUCTION_STREX);
                             switch (op1) {
                             case 0:  /*  strex */
                                 gen_st32(tmp, addr, IS_USER(s));
@@ -6359,9 +6870,11 @@ static void disas_arm_insn(CPUState * en
                         addr = load_reg(s, rn);
                         tmp = load_reg(s, rm);
                         if (insn & (1 << 22)) {
+                            instr_count_inc(ARM_INSTRUCTION_SWPB);
                             tmp2 = gen_ld8u(addr, IS_USER(s));
                             gen_st8(tmp, addr, IS_USER(s));
                         } else {
+                            instr_count_inc(ARM_INSTRUCTION_SWP);
                             tmp2 = gen_ld32(addr, IS_USER(s));
                             gen_st32(tmp, addr, IS_USER(s));
                         }
@@ -6383,13 +6896,16 @@ static void disas_arm_insn(CPUState * en
                     /* load */
                     switch(sh) {
                     case 1:
+                        instr_count_inc(ARM_INSTRUCTION_LDRH);
                         tmp = gen_ld16u(addr, IS_USER(s));
                         break;
                     case 2:
+                        instr_count_inc(ARM_INSTRUCTION_LDRSB);
                         tmp = gen_ld8s(addr, IS_USER(s));
                         break;
                     default:
                     case 3:
+                        instr_count_inc(ARM_INSTRUCTION_LDRSH);
                         tmp = gen_ld16s(addr, IS_USER(s));
                         break;
                     }
@@ -6398,6 +6914,7 @@ static void disas_arm_insn(CPUState * en
                     /* doubleword */
                     if (sh & 1) {
                         /* store */
+                        instr_count_inc(ARM_INSTRUCTION_STRD);
                         tmp = load_reg(s, rd);
                         gen_st32(tmp, addr, IS_USER(s));
                         tcg_gen_addi_i32(addr, addr, 4);
@@ -6406,6 +6923,7 @@ static void disas_arm_insn(CPUState * en
                         load = 0;
                     } else {
                         /* load */
+                        instr_count_inc(ARM_INSTRUCTION_LDRD);
                         tmp = gen_ld32(addr, IS_USER(s));
                         store_reg(s, rd, tmp);
                         tcg_gen_addi_i32(addr, addr, 4);
@@ -6417,6 +6935,7 @@ static void disas_arm_insn(CPUState * en
                 } else {
                     /* store */
                     tmp = load_reg(s, rd);
+                    instr_count_inc(ARM_INSTRUCTION_STRH);
                     gen_st16(tmp, addr, IS_USER(s));
                     load = 0;
                 }
@@ -6454,6 +6973,7 @@ static void disas_arm_insn(CPUState * en
                 rs = (insn >> 8) & 0xf;
                 switch ((insn >> 23) & 3) {
                 case 0: /* Parallel add/subtract.  */
+                    instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                     op1 = (insn >> 20) & 7;
                     tmp = load_reg(s, rn);
                     tmp2 = load_reg(s, rm);
@@ -6472,6 +6992,7 @@ static void disas_arm_insn(CPUState * en
                         shift = (insn >> 7) & 0x1f;
                         if (insn & (1 << 6)) {
                             /* pkhtb */
+                            instr_count_inc(ARM_INSTRUCTION_PKHTB);
                             if (shift == 0)
                                 shift = 31;
                             tcg_gen_sari_i32(tmp2, tmp2, shift);
@@ -6479,6 +7000,7 @@ static void disas_arm_insn(CPUState * en
                             tcg_gen_ext16u_i32(tmp2, tmp2);
                         } else {
                             /* pkhbt */
+                            instr_count_inc(ARM_INSTRUCTION_PKHBT);
                             if (shift)
                                 tcg_gen_shli_i32(tmp2, tmp2, shift);
                             tcg_gen_ext16u_i32(tmp, tmp);
@@ -6500,10 +7022,14 @@ static void disas_arm_insn(CPUState * en
                         }
                         sh = (insn >> 16) & 0x1f;
                         if (sh != 0) {
-                            if (insn & (1 << 22))
+                            if (insn & (1 << 22)) {
+                                instr_count_inc(ARM_INSTRUCTION_USAT);
                                 gen_helper_usat(tmp, tmp, tcg_const_i32(sh));
-                            else
+                            }
+                            else {
+                                instr_count_inc(ARM_INSTRUCTION_SSAT);
                                 gen_helper_ssat(tmp, tmp, tcg_const_i32(sh));
+                            }
                         }
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x00300fe0) == 0x00200f20) {
@@ -6511,14 +7037,19 @@ static void disas_arm_insn(CPUState * en
                         tmp = load_reg(s, rm);
                         sh = (insn >> 16) & 0x1f;
                         if (sh != 0) {
-                            if (insn & (1 << 22))
+                            if (insn & (1 << 22)) {
+                                instr_count_inc(ARM_INSTRUCTION_USAT16);
                                 gen_helper_usat16(tmp, tmp, tcg_const_i32(sh));
-                            else
+                            }
+                            else {
+                                instr_count_inc(ARM_INSTRUCTION_SSAT16);
                                 gen_helper_ssat16(tmp, tmp, tcg_const_i32(sh));
+                            }
                         }
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x00700fe0) == 0x00000fa0) {
                         /* Select bytes.  */
+                        instr_count_inc(ARM_INSTRUCTION_SEL);
                         tmp = load_reg(s, rn);
                         tmp2 = load_reg(s, rm);
                         tmp3 = new_tmp();
@@ -6536,12 +7067,30 @@ static void disas_arm_insn(CPUState * en
                             tcg_gen_rori_i32(tmp, tmp, shift * 8);
                         op1 = (insn >> 20) & 7;
                         switch (op1) {
-                        case 0: gen_sxtb16(tmp);  break;
-                        case 2: gen_sxtb(tmp);    break;
-                        case 3: gen_sxth(tmp);    break;
-                        case 4: gen_uxtb16(tmp);  break;
-                        case 6: gen_uxtb(tmp);    break;
-                        case 7: gen_uxth(tmp);    break;
+                        case 0:
+                            instr_index = ARM_INSTRUCTION_SXTB16;
+                            gen_sxtb16(tmp);
+                            break;
+                        case 2:
+                            instr_index = ARM_INSTRUCTION_SXTB;
+                            gen_sxtb(tmp);
+                            break;
+                        case 3:
+                            instr_index = ARM_INSTRUCTION_SXTH;
+                            gen_sxth(tmp);
+                            break;
+                        case 4:
+                            instr_index = ARM_INSTRUCTION_UXTB16;
+                            gen_uxtb16(tmp);
+                            break;
+                        case 6:
+                            instr_index = ARM_INSTRUCTION_UXTB;
+                            gen_uxtb(tmp);
+                            break;
+                        case 7:
+                            instr_index = ARM_INSTRUCTION_UXTH;
+                            gen_uxth(tmp);
+                            break;
                         default: goto illegal_op;
                         }
                         if (rn != 15) {
@@ -6552,23 +7101,30 @@ static void disas_arm_insn(CPUState * en
                                 tcg_gen_add_i32(tmp, tmp, tmp2);
                                 dead_tmp(tmp2);
                             }
+                            instr_index -= 3; /* add variants */
                         }
+                        instr_count_inc(instr_index);
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x003f0f60) == 0x003f0f20) {
                         /* rev */
                         tmp = load_reg(s, rm);
                         if (insn & (1 << 22)) {
                             if (insn & (1 << 7)) {
+                                instr_count_inc(ARM_INSTRUCTION_REVSH);
                                 gen_revsh(tmp);
                             } else {
                                 ARCH(6T2);
                                 gen_helper_rbit(tmp, tmp);
                             }
                         } else {
-                            if (insn & (1 << 7))
+                            if (insn & (1 << 7)) {
+                                instr_count_inc(ARM_INSTRUCTION_REV16);
                                 gen_rev16(tmp);
-                            else
+                            }
+                            else {
+                                instr_count_inc(ARM_INSTRUCTION_REV);
                                 tcg_gen_bswap_i32(tmp, tmp);
+                            }
                         }
                         store_reg(s, rd, tmp);
                     } else {
@@ -6590,11 +7146,16 @@ static void disas_arm_insn(CPUState * en
                             tmp2 = load_reg(s, rd);
                             if (insn & (1 << 6)) {
                                 tcg_gen_sub_i32(tmp, tmp, tmp2);
+                                instr_count_inc(ARM_INSTRUCTION_SMMLS);
                             } else {
                                 tcg_gen_add_i32(tmp, tmp, tmp2);
+                                instr_count_inc(ARM_INSTRUCTION_SMMLA);
                             }
                             dead_tmp(tmp2);
                         }
+                        else {
+                            instr_count_inc(ARM_INSTRUCTION_SMMUL);
+                        }
                         store_reg(s, rn, tmp);
                     } else {
                         if (insn & (1 << 5))
@@ -6603,8 +7164,10 @@ static void disas_arm_insn(CPUState * en
                         /* This addition cannot overflow.  */
                         if (insn & (1 << 6)) {
                             tcg_gen_sub_i32(tmp, tmp, tmp2);
+                            instr_index = 1;
                         } else {
                             tcg_gen_add_i32(tmp, tmp, tmp2);
+                            instr_index = 0;
                         }
                         dead_tmp(tmp2);
                         if (insn & (1 << 22)) {
@@ -6614,6 +7177,8 @@ static void disas_arm_insn(CPUState * en
                             dead_tmp(tmp);
                             gen_addq(s, tmp64, rd, rn);
                             gen_storeq_reg(s, rd, rn, tmp64);
+                            instr_index += ARM_INSTRUCTION_SMLALD;
+                            instr_count_inc(instr_index);
                         } else {
                             /* smuad, smusd, smlad, smlsd */
                             if (rd != 15)
@@ -6621,8 +7186,11 @@ static void disas_arm_insn(CPUState * en
                                 tmp2 = load_reg(s, rd);
                                 gen_helper_add_setq(tmp, tmp, tmp2);
                                 dead_tmp(tmp2);
+                                instr_index += 2; /* SMLAD, SMLSD */
                               }
                             store_reg(s, rn, tmp);
+                            instr_index += ARM_INSTRUCTION_SMUAD;
+                            instr_count_inc(instr_index);
                         }
                     }
                     break;
@@ -6639,12 +7207,15 @@ static void disas_arm_insn(CPUState * en
                             tmp2 = load_reg(s, rd);
                             tcg_gen_add_i32(tmp, tmp, tmp2);
                             dead_tmp(tmp2);
+                            instr_count_inc(ARM_INSTRUCTION_USADA8);
                         }
+                        else instr_count_inc(ARM_INSTRUCTION_USAD8);
                         store_reg(s, rn, tmp);
                         break;
                     case 0x20: case 0x24: case 0x28: case 0x2c:
                         /* Bitfield insert/clear.  */
                         ARCH(6T2);
+                        instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                         shift = (insn >> 7) & 0x1f;
                         i = (insn >> 16) & 0x1f;
                         i = i + 1 - shift;
@@ -6664,6 +7235,7 @@ static void disas_arm_insn(CPUState * en
                     case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
                     case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
                         ARCH(6T2);
+                        instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                         tmp = load_reg(s, rm);
                         shift = (insn >> 7) & 0x1f;
                         i = ((insn >> 16) & 0x1f) + 1;
@@ -6705,17 +7277,23 @@ static void disas_arm_insn(CPUState * en
             if (insn & (1 << 20)) {
                 /* load */
                 if (insn & (1 << 22)) {
+                    instr_count_inc(ARM_INSTRUCTION_LDRB);
                     tmp = gen_ld8u(tmp2, i);
                 } else {
+                    instr_count_inc(ARM_INSTRUCTION_LDR);
                     tmp = gen_ld32(tmp2, i);
                 }
             } else {
                 /* store */
                 tmp = load_reg(s, rd);
-                if (insn & (1 << 22))
+                if (insn & (1 << 22)) {
+                    instr_count_inc(ARM_INSTRUCTION_STRB);
                     gen_st8(tmp, tmp2, i);
-                else
+                }
+                else {
+                    instr_count_inc(ARM_INSTRUCTION_STR);
                     gen_st32(tmp, tmp2, i);
+                }
             }
             if (!(insn & (1 << 24))) {
                 gen_add_data_offset(s, insn, tmp2);
@@ -6740,6 +7318,18 @@ static void disas_arm_insn(CPUState * en
                 TCGv loaded_var;
                 /* load/store multiple words */
                 /* XXX: store correct base if write back */
+                switch (insn & 0x00500000 >> 20) {
+                case 0x0: instr_count_inc(ARM_INSTRUCTION_STM1); break;
+                case 0x1: instr_count_inc(ARM_INSTRUCTION_LDM1); break;
+                case 0x4: instr_count_inc(ARM_INSTRUCTION_STM2); break;
+                case 0x5:
+                    if (insn & (1 << 15)) {
+                        instr_count_inc(ARM_INSTRUCTION_LDM3);
+                    } else {
+                        instr_count_inc(ARM_INSTRUCTION_LDM2);
+                    }
+                    break;
+                }
                 user = 0;
                 if (insn & (1 << 22)) {
                     if (IS_USER(s))
@@ -6854,14 +7444,15 @@ static void disas_arm_insn(CPUState * en
         case 0xb:
             {
                 int32_t offset;
-
                 /* branch (and link) */
                 val = (int32_t)s->pc;
                 if (insn & (1 << 24)) {
+                    instr_count_inc(ARM_INSTRUCTION_B);
                     tmp = new_tmp();
                     tcg_gen_movi_i32(tmp, val);
                     store_reg(s, 14, tmp);
                 }
+                else instr_count_inc(ARM_INSTRUCTION_B);
                 offset = (((int32_t)insn << 8) >> 8);
                 val += (offset << 2) + 4;
                 gen_jmp(s, val);
@@ -6871,11 +7462,13 @@ static void disas_arm_insn(CPUState * en
         case 0xd:
         case 0xe:
             /* Coprocessor.  */
+            instr_count_inc(ARM_INSTRUCTION_COPROCESSOR);
             if (disas_coproc_insn(env, s, insn))
                 goto illegal_op;
             break;
         case 0xf:
             /* swi */
+            instr_count_inc(ARM_INSTRUCTION_SWI);
             gen_set_pc_im(s->pc);
             s->is_jmp = DISAS_SWI;
             break;
@@ -6988,6 +7581,9 @@ static int disas_thumb2_insn(CPUState *e
     int conds;
     int logic_cc;
 
+    instr_count_inc_init(ARM_THUMB_INSTRUCTION_COUNTER_OFFSET,
+            ARM_THUMB_INSTRUCTION_NOT_INSTRUMENTED);
+
     if (!(arm_feature(env, ARM_FEATURE_THUMB2)
           || arm_feature (env, ARM_FEATURE_M))) {
         /* Thumb-1 cores may need to treat bl and blx as a pair of
@@ -7601,10 +8197,12 @@ static int disas_thumb2_insn(CPUState *e
                 if (insn & (1 << 12)) {
                     /* b/bl */
                     gen_jmp(s, offset);
+                    instr_count_inc(ARM_THUMB_INSTRUCTION_BL);
                 } else {
                     /* blx */
                     offset &= ~(uint32_t)2;
                     gen_bx_im(s, offset);
+                    instr_count_inc(ARM_THUMB_INSTRUCTION_BLX1);
                 }
             } else if (((insn >> 23) & 7) == 7) {
                 /* Misc control */
@@ -7995,13 +8593,19 @@ illegal_op:
 
 static void disas_thumb_insn(CPUState *env, DisasContext *s)
 {
-    uint32_t val, insn, op, rm, rn, rd, shift, cond;
+    uint32_t val, insn, op, rm, rn, rd, shift, cond, instr_index;
     int32_t offset;
     int i;
     TCGv tmp;
     TCGv tmp2;
     TCGv addr;
 
+    instr_index = 0;
+
+    instr_count_inc_init(ARM_THUMB_INSTRUCTION_COUNTER_OFFSET,
+        ARM_THUMB_INSTRUCTION_NOT_INSTRUMENTED);
+
+
     if (s->condexec_mask) {
         cond = s->condexec_cond;
         s->condlabel = gen_new_label();
@@ -8022,18 +8626,26 @@ static void disas_thumb_insn(CPUState *e
             gen_movl_T0_reg(s, rn);
             if (insn & (1 << 10)) {
                 /* immediate */
+                instr_index = 0;
                 gen_op_movl_T1_im((insn >> 6) & 7);
             } else {
                 /* reg */
+                instr_index = 2; /* ADD3 / SUB3  */
                 rm = (insn >> 6) & 7;
                 gen_movl_T1_reg(s, rm);
             }
             if (insn & (1 << 9)) {
+                instr_count_inc(ARM_THUMB_INSTRUCTION_SUB1 + instr_index);
                 if (s->condexec_mask)
                     gen_op_subl_T0_T1();
                 else
                     gen_op_subl_T0_T1_cc();
             } else {
+                if ((insn >> 6) & 7) {
+                    instr_count_inc(ARM_THUMB_INSTRUCTION_ADD1 + instr_index);
+                } else {
+                    instr_count_inc(ARM_THUMB_INSTRUCTION_MOV2);
+                }
                 if (s->condexec_mask)
                     gen_op_addl_T0_T1();
                 else
@@ -8042,6 +8654,17 @@ static void disas_thumb_insn(CPUState *e
             gen_movl_reg_T0(s, rd);
         } else {
             /* shift immediate */
+            switch (op) {
+            case 0x0:
+                instr_count_inc(ARM_THUMB_INSTRUCTION_LSL1);
+                break;
+            case 0x1:
+                instr_count_inc(ARM_THUMB_INSTRUCTION_LSR1);
+                break;
+            case 0x2:
+                instr_count_inc(ARM_THUMB_INSTRUCTION_ASR1);
+                break;
+            }
             rm = (insn >> 3) & 7;
             shift = (insn >> 6) & 0x1f;
             tmp = load_reg(s, rm);
@@ -8063,19 +8686,23 @@ static void disas_thumb_insn(CPUState *e
         }
         switch (op) {
         case 0: /* mov */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_MOV1);
             if (!s->condexec_mask)
                 gen_op_logic_T0_cc();
             break;
         case 1: /* cmp */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_CMP1);
             gen_op_subl_T0_T1_cc();
             break;
         case 2: /* add */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_ADD2);
             if (s->condexec_mask)
                 gen_op_addl_T0_T1();
             else
                 gen_op_addl_T0_T1_cc();
             break;
         case 3: /* sub */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_SUB2);
             if (s->condexec_mask)
                 gen_op_subl_T0_T1();
             else
@@ -8087,6 +8714,7 @@ static void disas_thumb_insn(CPUState *e
         break;
     case 4:
         if (insn & (1 << 11)) {
+            instr_count_inc(ARM_THUMB_INSTRUCTION_LDR3);
             rd = (insn >> 8) & 7;
             /* load pc-relative.  Bit 1 of PC is ignored.  */
             val = s->pc + 2 + ((insn & 0xff) * 4);
@@ -8105,23 +8733,28 @@ static void disas_thumb_insn(CPUState *e
             op = (insn >> 8) & 3;
             switch (op) {
             case 0: /* add */
+                instr_count_inc(ARM_THUMB_INSTRUCTION_ADD4);
                 gen_movl_T0_reg(s, rd);
                 gen_movl_T1_reg(s, rm);
                 gen_op_addl_T0_T1();
                 gen_movl_reg_T0(s, rd);
                 break;
             case 1: /* cmp */
+                instr_count_inc(ARM_THUMB_INSTRUCTION_CMP3);
                 gen_movl_T0_reg(s, rd);
                 gen_movl_T1_reg(s, rm);
                 gen_op_subl_T0_T1_cc();
                 break;
             case 2: /* mov/cpy */
+                instr_count_inc(ARM_THUMB_INSTRUCTION_MOV3);
                 gen_movl_T0_reg(s, rm);
                 gen_movl_reg_T0(s, rd);
                 break;
             case 3:/* branch [and link] exchange thumb register */
+                instr_count_inc(ARM_THUMB_INSTRUCTION_BX);
                 tmp = load_reg(s, rm);
                 if (insn & (1 << 7)) {
+                    instr_count_inc(ARM_THUMB_INSTRUCTION_BLX2);
                     val = (uint32_t)s->pc | 1;
                     tmp2 = new_tmp();
                     tcg_gen_movi_i32(tmp2, val);
@@ -8155,16 +8788,19 @@ static void disas_thumb_insn(CPUState *e
         gen_movl_T1_reg(s, rm);
         switch (op) {
         case 0x0: /* and */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_AND);
             gen_op_andl_T0_T1();
             if (!s->condexec_mask)
                 gen_op_logic_T0_cc();
             break;
         case 0x1: /* eor */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_EOR);
             gen_op_xorl_T0_T1();
             if (!s->condexec_mask)
                 gen_op_logic_T0_cc();
             break;
         case 0x2: /* lsl */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_LSL2);
             if (s->condexec_mask) {
                 gen_helper_shl(cpu_T[1], cpu_T[1], cpu_T[0]);
             } else {
@@ -8173,6 +8809,7 @@ static void disas_thumb_insn(CPUState *e
             }
             break;
         case 0x3: /* lsr */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_LSR2);
             if (s->condexec_mask) {
                 gen_helper_shr(cpu_T[1], cpu_T[1], cpu_T[0]);
             } else {
@@ -8181,6 +8818,7 @@ static void disas_thumb_insn(CPUState *e
             }
             break;
         case 0x4: /* asr */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_ASR2);
             if (s->condexec_mask) {
                 gen_helper_sar(cpu_T[1], cpu_T[1], cpu_T[0]);
             } else {
@@ -8189,18 +8827,21 @@ static void disas_thumb_insn(CPUState *e
             }
             break;
         case 0x5: /* adc */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_ADC);
             if (s->condexec_mask)
                 gen_adc_T0_T1();
             else
                 gen_op_adcl_T0_T1_cc();
             break;
         case 0x6: /* sbc */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_SBC);
             if (s->condexec_mask)
                 gen_sbc_T0_T1();
             else
                 gen_op_sbcl_T0_T1_cc();
             break;
         case 0x7: /* ror */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_ROR);
             if (s->condexec_mask) {
                 gen_helper_ror(cpu_T[1], cpu_T[1], cpu_T[0]);
             } else {
@@ -8209,40 +8850,48 @@ static void disas_thumb_insn(CPUState *e
             }
             break;
         case 0x8: /* tst */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_TST);
             gen_op_andl_T0_T1();
             gen_op_logic_T0_cc();
             rd = 16;
             break;
         case 0x9: /* neg */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_NEG);
             if (s->condexec_mask)
                 tcg_gen_neg_i32(cpu_T[0], cpu_T[1]);
             else
                 gen_op_subl_T0_T1_cc();
             break;
         case 0xa: /* cmp */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_CMP2);
             gen_op_subl_T0_T1_cc();
             rd = 16;
             break;
         case 0xb: /* cmn */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_CMN);
             gen_op_addl_T0_T1_cc();
             rd = 16;
             break;
         case 0xc: /* orr */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_ORR);
             gen_op_orl_T0_T1();
             if (!s->condexec_mask)
                 gen_op_logic_T0_cc();
             break;
         case 0xd: /* mul */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_MUL);
             gen_op_mull_T0_T1();
             if (!s->condexec_mask)
                 gen_op_logic_T0_cc();
             break;
         case 0xe: /* bic */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_BIC);
             gen_op_bicl_T0_T1();
             if (!s->condexec_mask)
                 gen_op_logic_T0_cc();
             break;
         case 0xf: /* mvn */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_MVN);
             gen_op_notl_T1();
             if (!s->condexec_mask)
                 gen_op_logic_T1_cc();
@@ -8274,27 +8923,35 @@ static void disas_thumb_insn(CPUState *e
 
         switch (op) {
         case 0: /* str */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_STR2);
             gen_st32(tmp, addr, IS_USER(s));
             break;
         case 1: /* strh */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_STRH2);
             gen_st16(tmp, addr, IS_USER(s));
             break;
         case 2: /* strb */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_STRB2);
             gen_st8(tmp, addr, IS_USER(s));
             break;
         case 3: /* ldrsb */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_LDRSB);
             tmp = gen_ld8s(addr, IS_USER(s));
             break;
         case 4: /* ldr */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_LDR2);
             tmp = gen_ld32(addr, IS_USER(s));
             break;
         case 5: /* ldrh */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_LDRH2);
             tmp = gen_ld16u(addr, IS_USER(s));
             break;
         case 6: /* ldrb */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_LDRB2);
             tmp = gen_ld8u(addr, IS_USER(s));
             break;
         case 7: /* ldrsh */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_LDRSH);
             tmp = gen_ld16s(addr, IS_USER(s));
             break;
         }
@@ -8313,10 +8970,12 @@ static void disas_thumb_insn(CPUState *e
 
         if (insn & (1 << 11)) {
             /* load */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_LDR1);
             tmp = gen_ld32(addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_STR1);
             tmp = load_reg(s, rd);
             gen_st32(tmp, addr, IS_USER(s));
         }
@@ -8333,10 +8992,12 @@ static void disas_thumb_insn(CPUState *e
 
         if (insn & (1 << 11)) {
             /* load */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_LDRB1);
             tmp = gen_ld8u(addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_STRB1);
             tmp = load_reg(s, rd);
             gen_st8(tmp, addr, IS_USER(s));
         }
@@ -8353,10 +9014,12 @@ static void disas_thumb_insn(CPUState *e
 
         if (insn & (1 << 11)) {
             /* load */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_LDRH1);
             tmp = gen_ld16u(addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_STRH1);
             tmp = load_reg(s, rd);
             gen_st16(tmp, addr, IS_USER(s));
         }
@@ -8372,10 +9035,12 @@ static void disas_thumb_insn(CPUState *e
 
         if (insn & (1 << 11)) {
             /* load */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_LDR4);
             tmp = gen_ld32(addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_STR3);
             tmp = load_reg(s, rd);
             gen_st32(tmp, addr, IS_USER(s));
         }
@@ -8387,9 +9052,11 @@ static void disas_thumb_insn(CPUState *e
         rd = (insn >> 8) & 7;
         if (insn & (1 << 11)) {
             /* SP */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_ADD6);
             tmp = load_reg(s, 13);
         } else {
             /* PC. bit 1 is ignored.  */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_ADD5);
             tmp = new_tmp();
             tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
         }
@@ -8406,8 +9073,11 @@ static void disas_thumb_insn(CPUState *e
             /* adjust stack pointer */
             tmp = load_reg(s, 13);
             val = (insn & 0x7f) * 4;
-            if (insn & (1 << 7))
+            if (insn & (1 << 7)) {
+                instr_count_inc(ARM_THUMB_INSTRUCTION_ADD7);
                 val = -(int32_t)val;
+            }
+            else instr_count_inc(ARM_THUMB_INSTRUCTION_SUB4);
             tcg_gen_addi_i32(tmp, tmp, val);
             store_reg(s, 13, tmp);
             break;
@@ -8418,10 +9088,22 @@ static void disas_thumb_insn(CPUState *e
             rm = (insn >> 3) & 7;
             tmp = load_reg(s, rm);
             switch ((insn >> 6) & 3) {
-            case 0: gen_sxth(tmp); break;
-            case 1: gen_sxtb(tmp); break;
-            case 2: gen_uxth(tmp); break;
-            case 3: gen_uxtb(tmp); break;
+            case 0:
+                instr_count_inc(ARM_THUMB_INSTRUCTION_SXTH);
+                gen_sxth(tmp);
+                break;
+            case 1:
+                instr_count_inc(ARM_THUMB_INSTRUCTION_SXTB);
+                gen_sxtb(tmp);
+                break;
+            case 2:
+                instr_count_inc(ARM_THUMB_INSTRUCTION_UXTH);
+                gen_uxth(tmp);
+                break;
+            case 3:
+                instr_count_inc(ARM_THUMB_INSTRUCTION_UXTB);
+                gen_uxtb(tmp);
+                break;
             }
             store_reg(s, rd, tmp);
             break;
@@ -8443,10 +9125,12 @@ static void disas_thumb_insn(CPUState *e
                 if (insn & (1 << i)) {
                     if (insn & (1 << 11)) {
                         /* pop */
+                        instr_count_inc(ARM_THUMB_INSTRUCTION_POP);
                         tmp = gen_ld32(addr, IS_USER(s));
                         store_reg(s, i, tmp);
                     } else {
                         /* push */
+                        instr_count_inc(ARM_THUMB_INSTRUCTION_PUSH);
                         tmp = load_reg(s, i);
                         gen_st32(tmp, addr, IS_USER(s));
                     }
@@ -8458,11 +9142,13 @@ static void disas_thumb_insn(CPUState *e
             if (insn & (1 << 8)) {
                 if (insn & (1 << 11)) {
                     /* pop pc */
+                    instr_count_inc(ARM_THUMB_INSTRUCTION_POP);
                     tmp = gen_ld32(addr, IS_USER(s));
                     /* don't set the pc until the rest of the instruction
                        has completed */
                 } else {
                     /* push lr */
+                    instr_count_inc(ARM_THUMB_INSTRUCTION_PUSH);
                     tmp = load_reg(s, 14);
                     gen_st32(tmp, addr, IS_USER(s));
                 }
@@ -8506,6 +9192,7 @@ static void disas_thumb_insn(CPUState *e
             break;
 
         case 0xe: /* bkpt */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_BKPT);
             gen_set_condexec(s);
             gen_set_pc_im(s->pc - 2);
             gen_exception(EXCP_BKPT);
@@ -8518,15 +9205,25 @@ static void disas_thumb_insn(CPUState *e
             rd = insn & 0x7;
             tmp = load_reg(s, rn);
             switch ((insn >> 6) & 3) {
-            case 0: tcg_gen_bswap_i32(tmp, tmp); break;
-            case 1: gen_rev16(tmp); break;
-            case 3: gen_revsh(tmp); break;
+            case 0:
+                instr_count_inc(ARM_THUMB_INSTRUCTION_REV);
+                tcg_gen_bswap_i32(tmp, tmp);
+                break;
+            case 1:
+                instr_count_inc(ARM_THUMB_INSTRUCTION_REV16);
+                gen_rev16(tmp);
+                break;
+            case 3:
+                instr_count_inc(ARM_THUMB_INSTRUCTION_REVSH);
+                gen_revsh(tmp);
+                break;
             default: goto illegal_op;
             }
             store_reg(s, rd, tmp);
             break;
 
         case 6: /* cps */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_CPS);
             ARCH(6);
             if (IS_USER(s))
                 break;
@@ -8568,10 +9265,12 @@ static void disas_thumb_insn(CPUState *e
             if (insn & (1 << i)) {
                 if (insn & (1 << 11)) {
                     /* load */
+                    instr_count_inc(ARM_THUMB_INSTRUCTION_LDMIA);
                     tmp = gen_ld32(addr, IS_USER(s));
                     store_reg(s, i, tmp);
                 } else {
                     /* store */
+                    instr_count_inc(ARM_THUMB_INSTRUCTION_STMIA);
                     tmp = load_reg(s, i);
                     gen_st32(tmp, addr, IS_USER(s));
                 }
@@ -8595,6 +9294,7 @@ static void disas_thumb_insn(CPUState *e
 
         if (cond == 0xf) {
             /* swi */
+            instr_count_inc(ARM_THUMB_INSTRUCTION_SWI);
             gen_set_condexec(s);
             gen_set_pc_im(s->pc);
             s->is_jmp = DISAS_SWI;
@@ -8607,6 +9307,7 @@ static void disas_thumb_insn(CPUState *e
         gen_movl_T1_reg(s, 15);
 
         /* jump to the offset */
+        instr_count_inc(ARM_THUMB_INSTRUCTION_B1);
         val = (uint32_t)s->pc + 2;
         offset = ((int32_t)insn << 24) >> 24;
         val += offset << 1;
@@ -8615,11 +9316,14 @@ static void disas_thumb_insn(CPUState *e
 
     case 14:
         if (insn & (1 << 11)) {
+            printf("BLX1\n");
+            instr_count_inc(ARM_THUMB_INSTRUCTION_BLX1);
             if (disas_thumb2_insn(env, s, insn))
               goto undef32;
             break;
         }
         /* unconditional branch */
+        instr_count_inc(ARM_THUMB_INSTRUCTION_B2);
         val = (uint32_t)s->pc;
         offset = ((int32_t)insn << 21) >> 21;
         val += (offset << 1) + 2;
@@ -8627,6 +9331,11 @@ static void disas_thumb_insn(CPUState *e
         break;
 
     case 15:
+        if (insn & (1 << 11)) {
+            instr_count_inc(ARM_THUMB_INSTRUCTION_BL);
+        } else {
+            instr_count_inc(ARM_THUMB_INSTRUCTION_BL_BLX_HIGH_PART);
+        }
         if (disas_thumb2_insn(env, s, insn))
             goto undef32;
         break;

      parent reply	other threads:[~2009-06-15 13:32 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-06-12 10:56 [Qemu-devel] [PATCH] Instruction counting instrumentation for ARM, 2nd version Sami Kiminki
2009-06-12 15:41 ` Jamie Lokier
2009-06-15  8:12   ` Sami Kiminki
2009-06-15 13:26 ` Laurent Desnogues
2009-06-29  8:42   ` [Qemu-devel] [PATCH] Instruction counting instrumentation for ARM v3 Sami Kiminki
     [not found] ` <761ea48b0906150531j6cf88fa2v5652322efcc7469c@mail.gmail.com>
2009-06-15 13:31   ` Sami Kiminki [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1245072719.28612.34.camel@dis \
    --to=sami.kiminki@tkk.fi \
    --cc=laurent.desnogues@gmail.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).