qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: "Timo Töyry" <ttoyry@cs.hut.fi>
To: qemu-devel@nongnu.org
Subject: [Qemu-devel] Instruction counting instrumentation for ARM + initial patch
Date: Tue, 19 May 2009 17:59:57 +0300	[thread overview]
Message-ID: <1242745197.24234.7.camel@peak10.cs.hut.fi> (raw)

[-- Attachment #1: Type: text/plain, Size: 1762 bytes --]

Hi,

The attached patch implements instruction counting instrumentation for
ARM usermode linux. Currently the patch is a working draft but we
intend to clean it up for possible inclusion. We'd appreciate any
comments. Below is a more detailed description.

Motivation:
For some applications it is important to know what complex
instructions are used by the application. Also the data of used
instructions could be used to optimize the platform CPU in embedded
systems, e.g., choosing the most appropriate CPU for a specific task.

Implementation:
The instruction counters are implemented as extra guest CPU registers,
see target-arm/cpu.h:CPUARMState. The counter incrementation is done
by modifying decoder to generate TCG code to increment the counters.
The values of the counters are printed at guest exit (syscall
exit_group & al). The implementation is for arm-linux-user, but we
suppose this would be easily ported to other guest architectures, too.

Patch status:
The attached draft patch applies to 0.10.4. Currently it supports
armv6, vfp and thumb instructions. We'll probably add support for
neon (if we get permissions from ARM).

Todo:
- configure-support to enable/disable this feature at compile time
- extract instrumentation code from decoders, to better support re-use
  in other architectures
- selectable target (console/log-file) for outputting the values of
  the counters

Usage:
The instruction counting is enabled with command-line parameter
-instrcount. When the guest application exits the counter values are
printed to stderr. Example: qemu-arm -instrcount <some-arm-linux-binary>

This work is sponsored by Nokia / Maemo development team.

Regards,
Timo

-- 
Timo Töyry
Embedded Software Group / Helsinki University of Technology



[-- Attachment #2: qemu-0.10.4-instrumentation-draft.patch --]
[-- Type: text/x-patch, Size: 93949 bytes --]

Index: Makefile.target
===================================================================
--- Makefile.target	(.../tags/qemu-0_10_4)	(revision 33)
+++ Makefile.target	(.../trunk/qemu)	(revision 33)
@@ -208,8 +208,10 @@
 LIBOBJS+=s390-dis.o
 endif
 
+# instrumentation support
+LIBOBJS+=instrumentation.o
+
 # libqemu
-
 libqemu.a: $(LIBOBJS)
 
 translate.o: translate.c cpu.h
Index: linux-user/syscall.c
===================================================================
--- linux-user/syscall.c	(.../tags/qemu-0_10_4)	(revision 33)
+++ linux-user/syscall.c	(.../trunk/qemu)	(revision 33)
@@ -79,6 +79,8 @@
 #include "qemu.h"
 #include "qemu-common.h"
 
+#include "instrumentation.h"
+
 #if defined(USE_NPTL)
 #include <linux/futex.h>
 #define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \
@@ -300,6 +302,48 @@
 extern int setfsgid(int);
 extern int setgroups(int, gid_t *);
 
+inline static void print_instruction_counters(CPUState *env)
+{
+#ifdef TARGET_ARM
+   	int i1;
+   	uint32_t counted = 0;
+   	if (!instrumentation_count_instructions) return;
+   	fprintf(stderr, "Arm instructions:\n");
+   	for (i1 = 0; i1 < ARM_INSTRUCTIONS; i1++) {
+   		if (env->arm_instr_count[i1] > 0) {
+   			fprintf(stderr, "%s: %d\n", arm_instr_names[i1],
+   					env->arm_instr_count[i1]);
+   			if (i1 < (ARM_INSTRUCTIONS - 2)) counted += env->arm_instr_count[i1];
+   		}
+   	}
+   	fprintf(stderr, "Counted instructions: %d\n", counted);
+
+   	fprintf(stderr, "VFP instructions:\n");
+   	counted = 0;
+   	for (i1 = 0; i1 < ARM_VFP_INSTRUCTIONS; i1++) {
+   		if (env->arm_vfp_instr_count[i1] > 0) {
+   			fprintf(stderr, "%s: %d\n", arm_vfp_instr_names[i1],
+   					env->arm_vfp_instr_count[i1]);
+   			if (i1 < (ARM_VFP_INSTRUCTIONS - 2)) counted += env->arm_vfp_instr_count[i1];
+   		}
+   	}
+   	fprintf(stderr, "Counted instructions: %d\n", counted);
+
+   	fprintf(stderr, "Thumb instructions:\n");
+   	counted = 0;
+   	for (i1 = 0; i1 < ARM_THUMB_INSTRUCTIONS; i1++) {
+   		if (env->arm_thumb_instr_count[i1] > 0) {
+   			fprintf(stderr, "%s: %d\n", arm_thumb_instr_names[i1],
+   					env->arm_thumb_instr_count[i1]);
+   			if (i1 < (ARM_THUMB_INSTRUCTIONS - 2)) counted += env->arm_thumb_instr_count[i1];
+   		}
+   	}
+   	fprintf(stderr, "Counted instructions: %d\n", counted);
+#else
+	fprintf(stderr, "Instruction counting not supported in this platform.\n");
+#endif
+}
+
 #define ERRNO_TABLE_SIZE 1200
 
 /* target_to_host_errno_table[] is initialized from
@@ -487,7 +531,7 @@
 
     if (!is_error(mapped_addr))
 	target_brk = new_brk;
-    
+
     return target_brk;
 }
 
@@ -729,9 +773,9 @@
     abi_ulong target_cmsg_addr;
     struct target_cmsghdr *target_cmsg;
     socklen_t space = 0;
-    
+
     msg_controllen = tswapl(target_msgh->msg_controllen);
-    if (msg_controllen < sizeof (struct target_cmsghdr)) 
+    if (msg_controllen < sizeof (struct target_cmsghdr))
         goto the_end;
     target_cmsg_addr = tswapl(target_msgh->msg_control);
     target_cmsg = lock_user(VERIFY_READ, target_cmsg_addr, msg_controllen, 1);
@@ -788,7 +832,7 @@
     socklen_t space = 0;
 
     msg_controllen = tswapl(target_msgh->msg_controllen);
-    if (msg_controllen < sizeof (struct target_cmsghdr)) 
+    if (msg_controllen < sizeof (struct target_cmsghdr))
         goto the_end;
     target_cmsg_addr = tswapl(target_msgh->msg_control);
     target_cmsg = lock_user(VERIFY_WRITE, target_cmsg_addr, msg_controllen, 0);
@@ -2147,7 +2191,7 @@
             }
             raddr = h2g((unsigned long)host_addr);
             /* find out the length of the shared memory segment */
-            
+
             ret = get_errno(shmctl(first, IPC_STAT, &shm_info));
             if (is_error(ret)) {
                 /* can't get length, bail out */
@@ -2703,7 +2747,7 @@
     }
     unlock_user_struct(target_ldt_info, ptr, 1);
 
-    if (ldt_info.entry_number < TARGET_GDT_ENTRY_TLS_MIN || 
+    if (ldt_info.entry_number < TARGET_GDT_ENTRY_TLS_MIN ||
         ldt_info.entry_number > TARGET_GDT_ENTRY_TLS_MAX)
            return -TARGET_EINVAL;
     seg_32bit = ldt_info.flags & 1;
@@ -2781,7 +2825,7 @@
     lp = (uint32_t *)(gdt_table + idx);
     entry_1 = tswap32(lp[0]);
     entry_2 = tswap32(lp[1]);
-    
+
     read_exec_only = ((entry_2 >> 9) & 1) ^ 1;
     contents = (entry_2 >> 10) & 3;
     seg_not_present = ((entry_2 >> 15) & 1) ^ 1;
@@ -2797,8 +2841,8 @@
         (read_exec_only << 3) | (limit_in_pages << 4) |
         (seg_not_present << 5) | (useable << 6) | (lm << 7);
     limit = (entry_1 & 0xffff) | (entry_2  & 0xf0000);
-    base_addr = (entry_1 >> 16) | 
-        (entry_2 & 0xff000000) | 
+    base_addr = (entry_1 >> 16) |
+        (entry_2 & 0xff000000) |
         ((entry_2 & 0xff) << 16);
     target_ldt_info->base_addr = tswapl(base_addr);
     target_ldt_info->limit = tswap32(limit);
@@ -2814,7 +2858,7 @@
     abi_long ret;
     abi_ulong val;
     int idx;
-    
+
     switch(code) {
     case TARGET_ARCH_SET_GS:
     case TARGET_ARCH_SET_FS:
@@ -3431,6 +3475,7 @@
 #ifdef HAVE_GPROF
         _mcleanup();
 #endif
+        print_instruction_counters(cpu_env);
         gdb_exit(cpu_env, arg1);
         /* XXX: should free thread stack and CPU env */
         sys_exit(arg1);
@@ -4923,6 +4968,7 @@
 #ifdef HAVE_GPROF
         _mcleanup();
 #endif
+        print_instruction_counters(cpu_env);
         gdb_exit(cpu_env, arg1);
         ret = get_errno(exit_group(arg1));
         break;
@@ -5501,7 +5547,7 @@
         break;
 #if defined(TARGET_NR_fchownat) && defined(__NR_fchownat)
     case TARGET_NR_fchownat:
-        if (!(p = lock_user_string(arg2))) 
+        if (!(p = lock_user_string(arg2)))
             goto efault;
         ret = get_errno(sys_fchownat(arg1, p, low2highuid(arg3), low2highgid(arg4), arg5));
         unlock_user(p, arg2, 0);
@@ -5826,7 +5872,7 @@
         case TARGET_F_GETLK64:
 #ifdef TARGET_ARM
             if (((CPUARMState *)cpu_env)->eabi) {
-                if (!lock_user_struct(VERIFY_READ, target_efl, arg3, 1)) 
+                if (!lock_user_struct(VERIFY_READ, target_efl, arg3, 1))
                     goto efault;
                 fl.l_type = tswap16(target_efl->l_type);
                 fl.l_whence = tswap16(target_efl->l_whence);
@@ -5837,7 +5883,7 @@
             } else
 #endif
             {
-                if (!lock_user_struct(VERIFY_READ, target_fl, arg3, 1)) 
+                if (!lock_user_struct(VERIFY_READ, target_fl, arg3, 1))
                     goto efault;
                 fl.l_type = tswap16(target_fl->l_type);
                 fl.l_whence = tswap16(target_fl->l_whence);
@@ -5850,7 +5896,7 @@
 	    if (ret == 0) {
 #ifdef TARGET_ARM
                 if (((CPUARMState *)cpu_env)->eabi) {
-                    if (!lock_user_struct(VERIFY_WRITE, target_efl, arg3, 0)) 
+                    if (!lock_user_struct(VERIFY_WRITE, target_efl, arg3, 0))
                         goto efault;
                     target_efl->l_type = tswap16(fl.l_type);
                     target_efl->l_whence = tswap16(fl.l_whence);
@@ -5861,7 +5907,7 @@
                 } else
 #endif
                 {
-                    if (!lock_user_struct(VERIFY_WRITE, target_fl, arg3, 0)) 
+                    if (!lock_user_struct(VERIFY_WRITE, target_fl, arg3, 0))
                         goto efault;
                     target_fl->l_type = tswap16(fl.l_type);
                     target_fl->l_whence = tswap16(fl.l_whence);
@@ -5877,7 +5923,7 @@
         case TARGET_F_SETLKW64:
 #ifdef TARGET_ARM
             if (((CPUARMState *)cpu_env)->eabi) {
-                if (!lock_user_struct(VERIFY_READ, target_efl, arg3, 1)) 
+                if (!lock_user_struct(VERIFY_READ, target_efl, arg3, 1))
                     goto efault;
                 fl.l_type = tswap16(target_efl->l_type);
                 fl.l_whence = tswap16(target_efl->l_whence);
@@ -5888,7 +5934,7 @@
             } else
 #endif
             {
-                if (!lock_user_struct(VERIFY_READ, target_fl, arg3, 1)) 
+                if (!lock_user_struct(VERIFY_READ, target_fl, arg3, 1))
                     goto efault;
                 fl.l_type = tswap16(target_fl->l_type);
                 fl.l_whence = tswap16(target_fl->l_whence);
Index: linux-user/main.c
===================================================================
--- linux-user/main.c	(.../tags/qemu-0_10_4)	(revision 33)
+++ linux-user/main.c	(.../trunk/qemu)	(revision 33)
@@ -35,6 +35,8 @@
 
 #include "envlist.h"
 
+#include "instrumentation.h"
+
 #define DEBUG_LOGFILE "/tmp/qemu.log"
 
 char *exec_path;
@@ -1945,7 +1947,7 @@
 {
     int trapnr, ret;
     target_siginfo_t info;
-    
+
     while (1) {
         trapnr = cpu_cris_exec (env);
         switch (trapnr) {
@@ -1963,13 +1965,13 @@
 	  /* just indicate that signals should be handled asap */
 	  break;
         case EXCP_BREAK:
-            ret = do_syscall(env, 
-                             env->regs[9], 
-                             env->regs[10], 
-                             env->regs[11], 
-                             env->regs[12], 
-                             env->regs[13], 
-                             env->pregs[7], 
+            ret = do_syscall(env,
+                             env->regs[9],
+                             env->regs[10],
+                             env->regs[11],
+                             env->regs[12],
+                             env->regs[13],
+                             env->pregs[7],
                              env->pregs[11]);
             env->regs[10] = ret;
             break;
@@ -2199,6 +2201,8 @@
            "-p pagesize  set the host page size to 'pagesize'\n"
            "-strace      log system calls\n"
            "\n"
+           "Other:\n"
+           "-instrcount	Count instructions\n"
            "Environment variables:\n"
            "QEMU_STRACE       Print system calls and arguments similar to the\n"
            "                  'strace' program.  Enable by setting to any value.\n"
@@ -2222,7 +2226,7 @@
 void init_task_state(TaskState *ts)
 {
     int i;
- 
+
     ts->used = 1;
     ts->first_free = ts->sigqueue_table;
     for (i = 0; i < MAX_SIGQUEUE_SIZE - 1; i++) {
@@ -2230,7 +2234,7 @@
     }
     ts->sigqueue_table[i].next = NULL;
 }
- 
+
 int main(int argc, char **argv, char **envp)
 {
     const char *filename;
@@ -2341,8 +2345,9 @@
             (void) envlist_unsetenv(envlist, "LD_PRELOAD");
         } else if (!strcmp(r, "strace")) {
             do_strace = 1;
-        } else
-        {
+        } else if (!strcmp(r, "instrcount")) {
+        	instrumentation_count_instructions = 1;
+        } else {
             usage();
         }
     }
@@ -2667,7 +2672,7 @@
 	    env->regs[12] = regs->r12;
 	    env->regs[13] = regs->r13;
 	    env->regs[14] = info->start_stack;
-	    env->regs[15] = regs->acr;	    
+	    env->regs[15] = regs->acr;
 	    env->pc = regs->erp;
     }
 #else

Index: Makefile
===================================================================
--- Makefile	(.../tags/qemu-0_10_4)	(revision 33)
+++ Makefile	(.../trunk/qemu)	(revision 33)
@@ -179,6 +179,10 @@
 # USER_OBJS is code used by qemu userspace emulation
 USER_OBJS=cutils.o  cache-utils.o
 
+# instruction count instrumentation
+USER_OBJS+=instrumentation.o
+
+
 libqemu_user.a: $(USER_OBJS)
 
 ######################################################################
Index: instrumentation.c
===================================================================
--- instrumentation.c	(.../tags/qemu-0_10_4)	(revision 0)
+++ instrumentation.c	(.../trunk/qemu)	(revision 33)
@@ -0,0 +1,9 @@
+/*
+ * instrument.c
+ *
+ *  Created on: May 14, 2009
+ *      Author: ttoyry
+ */
+
+#include "instrumentation.h"
+unsigned int instrumentation_count_instructions = 0;
Index: instrumentation.h
===================================================================
--- instrumentation.h	(.../tags/qemu-0_10_4)	(revision 0)
+++ instrumentation.h	(.../trunk/qemu)	(revision 33)
@@ -0,0 +1,16 @@
+/*
+ * instrumentation.h
+ *
+ * Author: Timo Toyry
+ */
+
+#ifndef INSTRUMENTATION_H
+#define INSTRUMENTATION_H
+
+/*
+ * 0 to disable (default)
+ * nonzero to enable
+ */
+extern unsigned int instrumentation_count_instructions;
+
+#endif /* INSTRUMENTATION_H */
Index: target-arm/cpu.h
===================================================================
--- target-arm/cpu.h	(.../tags/qemu-0_10_4)	(revision 33)
+++ target-arm/cpu.h	(.../trunk/qemu)	(revision 33)
@@ -51,6 +51,332 @@
 #define ARMV7M_EXCP_PENDSV  14
 #define ARMV7M_EXCP_SYSTICK 15
 
+/* Do not change the order of the instructions in the blocks marked by with - | -. */
+enum arm_instructions {
+	ARM_INSTRUCTION_B,
+	ARM_INSTRUCTION_BL,
+	ARM_INSTRUCTION_BLX,
+	ARM_INSTRUCTION_BX,
+	ARM_INSTRUCTION_BXJ,
+	ARM_INSTRUCTION_ADC,
+	ARM_INSTRUCTION_ADD,
+	ARM_INSTRUCTION_AND,
+	ARM_INSTRUCTION_BIC,
+	ARM_INSTRUCTION_CMN,
+	ARM_INSTRUCTION_CMP,
+	ARM_INSTRUCTION_EOR,
+	ARM_INSTRUCTION_MOV,
+	ARM_INSTRUCTION_MVN,
+	ARM_INSTRUCTION_ORR,
+	ARM_INSTRUCTION_RSB,
+	ARM_INSTRUCTION_RSC,
+	ARM_INSTRUCTION_SBC,
+	ARM_INSTRUCTION_SUB,
+	ARM_INSTRUCTION_TEQ,
+	ARM_INSTRUCTION_TST,
+	ARM_INSTRUCTION_MUL,  /* - */
+	ARM_INSTRUCTION_MULS, /* - */
+	ARM_INSTRUCTION_MLA,  /* - */
+	ARM_INSTRUCTION_MLAS, /* - */
+	ARM_INSTRUCTION_SMLAXY,
+	ARM_INSTRUCTION_SMLAL,  /* - */
+	ARM_INSTRUCTION_SMLALS, /* - */
+	ARM_INSTRUCTION_SMLALXY,
+	ARM_INSTRUCTION_SMLAWY,
+	ARM_INSTRUCTION_SMUAD, /* - */
+	ARM_INSTRUCTION_SMUSD, /* | */
+	ARM_INSTRUCTION_SMLAD, /* | */
+	ARM_INSTRUCTION_SMLSD, /* - */
+	ARM_INSTRUCTION_SMLALD, /* - */
+	ARM_INSTRUCTION_SMLSLD, /* - */
+	ARM_INSTRUCTION_SMMLA,
+	ARM_INSTRUCTION_SMMLS,
+	ARM_INSTRUCTION_SMMUL,
+	ARM_INSTRUCTION_SMULXY,
+	ARM_INSTRUCTION_SMULL,  /* - */
+	ARM_INSTRUCTION_SMULLS, /* - */
+	ARM_INSTRUCTION_SMULWY,
+	ARM_INSTRUCTION_UMAAL,
+	ARM_INSTRUCTION_UMLAL,  /* - */
+	ARM_INSTRUCTION_UMLALS, /* - */
+	ARM_INSTRUCTION_UMULL,  /* - */
+	ARM_INSTRUCTION_UMULLS, /* - */
+	ARM_INSTRUCTION_QADD,
+	ARM_INSTRUCTION_QDADD,
+	ARM_INSTRUCTION_QADD16,   /* - */
+	ARM_INSTRUCTION_QADDSUBX, /* | */
+	ARM_INSTRUCTION_QSUBADDX, /* | */
+	ARM_INSTRUCTION_QSUB16,   /* | */
+	ARM_INSTRUCTION_QADD8,    /* | */
+	ARM_INSTRUCTION_QSUB8,    /* - */
+	ARM_INSTRUCTION_QSUB,
+	ARM_INSTRUCTION_QDSUB,
+	ARM_INSTRUCTION_SADD16,   /* - */
+	ARM_INSTRUCTION_SADDSUBX, /* | */
+	ARM_INSTRUCTION_SSUBADDX, /* | */
+	ARM_INSTRUCTION_SSUB16,   /* | */
+	ARM_INSTRUCTION_SADD8,    /* | */
+	ARM_INSTRUCTION_SSUB8,    /* - */
+	ARM_INSTRUCTION_SHADD16,   /* - */
+	ARM_INSTRUCTION_SHADDSUBX, /* | */
+	ARM_INSTRUCTION_SHSUBADDX, /* | */
+	ARM_INSTRUCTION_SHSUB16,   /* | */
+	ARM_INSTRUCTION_SHADD8,    /* | */
+	ARM_INSTRUCTION_SHSUB8,    /* - */
+	ARM_INSTRUCTION_UADD16,   /* - */
+	ARM_INSTRUCTION_UADDSUBX, /* | */
+	ARM_INSTRUCTION_USUBADDX, /* | */
+	ARM_INSTRUCTION_USUB16,   /* | */
+	ARM_INSTRUCTION_UADD8,    /* | */
+	ARM_INSTRUCTION_USUB8,    /* - */
+	ARM_INSTRUCTION_UHADD16,   /* - */
+	ARM_INSTRUCTION_UHADDSUBX, /* | */
+	ARM_INSTRUCTION_UHSUBADDX, /* | */
+	ARM_INSTRUCTION_UHSUB16,   /* | */
+	ARM_INSTRUCTION_UHADD8,    /* | */
+	ARM_INSTRUCTION_UHSUB8,    /* - */
+	ARM_INSTRUCTION_UQADD16,   /* - */
+	ARM_INSTRUCTION_UQADDSUBX, /* | */
+	ARM_INSTRUCTION_UQSUBADDX, /* | */
+	ARM_INSTRUCTION_UQSUB16,   /* | */
+	ARM_INSTRUCTION_UQADD8,    /* | */
+	ARM_INSTRUCTION_UQSUB8,    /* - */
+	ARM_INSTRUCTION_SXTAB16, /* - */
+	ARM_INSTRUCTION_SXTAB,   /* | */
+	ARM_INSTRUCTION_SXTAH,   /* | */
+	ARM_INSTRUCTION_SXTB16,  /* | */
+	ARM_INSTRUCTION_SXTB,    /* | */
+	ARM_INSTRUCTION_SXTH,    /* - */
+	ARM_INSTRUCTION_UXTAB16, /* - */
+	ARM_INSTRUCTION_UXTAB,   /* | */
+	ARM_INSTRUCTION_UXTAH,   /* | */
+	ARM_INSTRUCTION_UXTB16,  /* | */
+	ARM_INSTRUCTION_UXTB,    /* | */
+	ARM_INSTRUCTION_UXTH,    /* - */
+	ARM_INSTRUCTION_CLZ,
+	ARM_INSTRUCTION_USAD8,
+	ARM_INSTRUCTION_USADA8,
+	ARM_INSTRUCTION_PKH,
+	ARM_INSTRUCTION_PKHBT,
+	ARM_INSTRUCTION_PKHTB,
+	ARM_INSTRUCTION_REV,
+	ARM_INSTRUCTION_REV16,
+	ARM_INSTRUCTION_REVSH,
+	ARM_INSTRUCTION_SEL,
+	ARM_INSTRUCTION_SSAT,
+	ARM_INSTRUCTION_SSAT16,
+	ARM_INSTRUCTION_USAT,
+	ARM_INSTRUCTION_USAT16,
+	ARM_INSTRUCTION_MRS,
+	ARM_INSTRUCTION_MSR,
+	ARM_INSTRUCTION_CPS,
+	ARM_INSTRUCTION_SETEND,
+	ARM_INSTRUCTION_LDR,
+	ARM_INSTRUCTION_LDRB,
+	ARM_INSTRUCTION_LDRBT,
+	ARM_INSTRUCTION_LDRD,
+	ARM_INSTRUCTION_LDREX,
+	ARM_INSTRUCTION_LDRH,
+	ARM_INSTRUCTION_LDRSB,
+	ARM_INSTRUCTION_LDRSH,
+	ARM_INSTRUCTION_LDRT,
+	ARM_INSTRUCTION_STR,
+	ARM_INSTRUCTION_STRB,
+	ARM_INSTRUCTION_STRBT,
+	ARM_INSTRUCTION_STRD,
+	ARM_INSTRUCTION_STREX,
+	ARM_INSTRUCTION_STRH,
+	ARM_INSTRUCTION_STRT,
+	ARM_INSTRUCTION_LDM1, //See Arm manual ARM DDI 0100I page A3-27
+	ARM_INSTRUCTION_LDM2,
+	ARM_INSTRUCTION_LDM3,
+	ARM_INSTRUCTION_STM1,
+	ARM_INSTRUCTION_STM2,
+	ARM_INSTRUCTION_SWP,
+	ARM_INSTRUCTION_SWPB,
+	ARM_INSTRUCTION_BKPT,
+	ARM_INSTRUCTION_SWI,
+	ARM_INSTRUCTION_CDP,
+	ARM_INSTRUCTION_LDC,
+	ARM_INSTRUCTION_MCR,
+	ARM_INSTRUCTION_MCRR,
+	ARM_INSTRUCTION_MRC,
+	ARM_INSTRUCTION_MRRC,
+	ARM_INSTRUCTION_STC,
+	ARM_INSTRUCTION_PLD,
+	ARM_INSTRUCTION_RFE,
+	ARM_INSTRUCTION_SRS,
+	ARM_INSTRUCTION_MCRR2,
+	ARM_INSTRUCTION_MRRC2,
+	ARM_INSTRUCTION_STC2,
+	ARM_INSTRUCTION_LDC2,
+	ARM_INSTRUCTION_CDP2,
+	ARM_INSTRUCTION_MCR2,
+	ARM_INSTRUCTION_MRC2,
+	ARM_INSTRUCTION_COPROCESSOR,
+	ARM_INSTRUCTION_UNKNOWN,
+	ARM_INSTRUCTION_NOT_INSTRUMENTED,
+	ARM_INSTRUCTION_TOTAL_COUNT,
+	ARM_INSTRUCTIONS
+};
+
+/* Do not change the order of the instructions in the blocks marked by with - | -. */
+enum arm_vfp_instructions {
+	ARM_VFP_INSTRUCTION_FABSD, /* - */
+	ARM_VFP_INSTRUCTION_FABSS, /* - */
+	ARM_VFP_INSTRUCTION_FADDD, /* - */
+	ARM_VFP_INSTRUCTION_FADDS, /* - */
+	ARM_VFP_INSTRUCTION_FCMPD, /* - */
+	ARM_VFP_INSTRUCTION_FCMPS, /* - */
+	ARM_VFP_INSTRUCTION_FCMPED,  /* - */
+	ARM_VFP_INSTRUCTION_FCMPES,  /* - */
+	ARM_VFP_INSTRUCTION_FCMPEZD, /* - */
+	ARM_VFP_INSTRUCTION_FCMPEZS, /* - */
+	ARM_VFP_INSTRUCTION_FCMPZD, /* - */
+	ARM_VFP_INSTRUCTION_FCMPZS, /* - */
+	ARM_VFP_INSTRUCTION_FCPYD, /* - */
+	ARM_VFP_INSTRUCTION_FCPYS, /* - */
+	ARM_VFP_INSTRUCTION_FCVTDS, /* - */
+	ARM_VFP_INSTRUCTION_FCVTSD, /* - */
+	ARM_VFP_INSTRUCTION_FDIVD, /* - */
+	ARM_VFP_INSTRUCTION_FDIVS, /* - */
+	ARM_VFP_INSTRUCTION_FLDD, /* - */
+	ARM_VFP_INSTRUCTION_FLDS, /* - */
+	ARM_VFP_INSTRUCTION_FLDMD, /* - */
+	ARM_VFP_INSTRUCTION_FLDMS, /* - */
+	ARM_VFP_INSTRUCTION_FLDMX,
+	ARM_VFP_INSTRUCTION_FMACD,
+	ARM_VFP_INSTRUCTION_FMACS,
+	ARM_VFP_INSTRUCTION_FMDHR,
+	ARM_VFP_INSTRUCTION_FMDLR,
+	ARM_VFP_INSTRUCTION_FMDRR,
+	ARM_VFP_INSTRUCTION_FMRDH,
+	ARM_VFP_INSTRUCTION_FMRDL,
+	ARM_VFP_INSTRUCTION_FMRRD, /* - */
+	ARM_VFP_INSTRUCTION_FMRRS, /* - */
+	ARM_VFP_INSTRUCTION_FMRS,
+	ARM_VFP_INSTRUCTION_FMRX,
+	ARM_VFP_INSTRUCTION_FMSCD, /* - */
+	ARM_VFP_INSTRUCTION_FMSCS, /* - */
+	ARM_VFP_INSTRUCTION_FMSR,
+	ARM_VFP_INSTRUCTION_FMSRR,
+	ARM_VFP_INSTRUCTION_FMSTAT,
+	ARM_VFP_INSTRUCTION_FMULD, /* - */
+	ARM_VFP_INSTRUCTION_FMULS, /* - */
+	ARM_VFP_INSTRUCTION_FMXR,
+	ARM_VFP_INSTRUCTION_FNEGD, /* - */
+	ARM_VFP_INSTRUCTION_FNEGS, /* - */
+	ARM_VFP_INSTRUCTION_FNMACD, /* - */
+	ARM_VFP_INSTRUCTION_FNMACS, /* - */
+	ARM_VFP_INSTRUCTION_FNMSCD, /* - */
+	ARM_VFP_INSTRUCTION_FNMSCS, /* - */
+	ARM_VFP_INSTRUCTION_FNMULD, /* - */
+	ARM_VFP_INSTRUCTION_FNMULS, /* - */
+	ARM_VFP_INSTRUCTION_FSITOD, /* - */
+	ARM_VFP_INSTRUCTION_FSITOS, /* - */
+	ARM_VFP_INSTRUCTION_FSQRTD, /* - */
+	ARM_VFP_INSTRUCTION_FSQRTS, /* - */
+	ARM_VFP_INSTRUCTION_FSTD, /* - */
+	ARM_VFP_INSTRUCTION_FSTS, /* - */
+	ARM_VFP_INSTRUCTION_FSTMD,
+	ARM_VFP_INSTRUCTION_FSTMS,
+	ARM_VFP_INSTRUCTION_FSTMX,
+	ARM_VFP_INSTRUCTION_FSUBD, /* - */
+	ARM_VFP_INSTRUCTION_FSUBS, /* - */
+	ARM_VFP_INSTRUCTION_FTOSID, /* - */
+	ARM_VFP_INSTRUCTION_FTOSIS, /* - */
+	ARM_VFP_INSTRUCTION_FTOUID, /* - */
+	ARM_VFP_INSTRUCTION_FTOUIS, /* - */
+	ARM_VFP_INSTRUCTION_FUITOD, /* - */
+	ARM_VFP_INSTRUCTION_FUITOS, /* - */
+	ARM_VFP_INSTRUCTION_UNKNOWN,
+	ARM_VFP_INSTRUCTION_NOT_INSTRUMENTED,
+	ARM_VFP_INSTRUCTION_TOTAL_COUNT,
+	ARM_VFP_INSTRUCTIONS
+};
+
+/* Do not change the order of the instructions in the blocks marked by with - | -. */
+enum arm_thumb_instructions {
+	ARM_THUMB_INSTRUCTION_ADC,
+	ARM_THUMB_INSTRUCTION_ADD1, /* - */
+	ARM_THUMB_INSTRUCTION_ADD2, /* | */
+	ARM_THUMB_INSTRUCTION_ADD3, /* - */
+	ARM_THUMB_INSTRUCTION_ADD4,
+	ARM_THUMB_INSTRUCTION_ADD5,
+	ARM_THUMB_INSTRUCTION_ADD6,
+	ARM_THUMB_INSTRUCTION_ADD7,
+	ARM_THUMB_INSTRUCTION_AND,
+	ARM_THUMB_INSTRUCTION_ASR1,
+	ARM_THUMB_INSTRUCTION_ASR2,
+	ARM_THUMB_INSTRUCTION_B1,
+	ARM_THUMB_INSTRUCTION_B2,
+	ARM_THUMB_INSTRUCTION_BIC,
+	ARM_THUMB_INSTRUCTION_BKPT,
+	ARM_THUMB_INSTRUCTION_BL,
+	ARM_THUMB_INSTRUCTION_BLX1,
+	ARM_THUMB_INSTRUCTION_BLX2,
+	ARM_THUMB_INSTRUCTION_BX,
+	ARM_THUMB_INSTRUCTION_CMN,
+	ARM_THUMB_INSTRUCTION_CMP1,
+	ARM_THUMB_INSTRUCTION_CMP2,
+	ARM_THUMB_INSTRUCTION_CMP3,
+	ARM_THUMB_INSTRUCTION_CPS,
+	ARM_THUMB_INSTRUCTION_CPY,
+	ARM_THUMB_INSTRUCTION_EOR,
+	ARM_THUMB_INSTRUCTION_LDMIA,
+	ARM_THUMB_INSTRUCTION_LDR1,
+	ARM_THUMB_INSTRUCTION_LDR2,
+	ARM_THUMB_INSTRUCTION_LDR3,
+	ARM_THUMB_INSTRUCTION_LDR4,
+	ARM_THUMB_INSTRUCTION_LDRB1,
+	ARM_THUMB_INSTRUCTION_LDRB2,
+	ARM_THUMB_INSTRUCTION_LDRH1,
+	ARM_THUMB_INSTRUCTION_LDRH2,
+	ARM_THUMB_INSTRUCTION_LDRSB,
+	ARM_THUMB_INSTRUCTION_LDRSH,
+	ARM_THUMB_INSTRUCTION_LSL1,
+	ARM_THUMB_INSTRUCTION_LSL2,
+	ARM_THUMB_INSTRUCTION_LSR1,
+	ARM_THUMB_INSTRUCTION_LSR2,
+	ARM_THUMB_INSTRUCTION_MOV1,
+	ARM_THUMB_INSTRUCTION_MOV2,
+	ARM_THUMB_INSTRUCTION_MOV3,
+	ARM_THUMB_INSTRUCTION_MUL,
+	ARM_THUMB_INSTRUCTION_MVN,
+	ARM_THUMB_INSTRUCTION_NEG,
+	ARM_THUMB_INSTRUCTION_ORR,
+	ARM_THUMB_INSTRUCTION_POP,
+	ARM_THUMB_INSTRUCTION_PUSH,
+	ARM_THUMB_INSTRUCTION_REV,
+	ARM_THUMB_INSTRUCTION_REV16,
+	ARM_THUMB_INSTRUCTION_REVSH,
+	ARM_THUMB_INSTRUCTION_ROR,
+	ARM_THUMB_INSTRUCTION_SBC,
+	ARM_THUMB_INSTRUCTION_SETEND,
+	ARM_THUMB_INSTRUCTION_STMIA,
+	ARM_THUMB_INSTRUCTION_STR1,
+	ARM_THUMB_INSTRUCTION_STR2,
+	ARM_THUMB_INSTRUCTION_STR3,
+	ARM_THUMB_INSTRUCTION_STRB1,
+	ARM_THUMB_INSTRUCTION_STRB2,
+	ARM_THUMB_INSTRUCTION_STRH1,
+	ARM_THUMB_INSTRUCTION_STRH2,
+	ARM_THUMB_INSTRUCTION_SUB1, /* - */
+	ARM_THUMB_INSTRUCTION_SUB2, /* | */
+	ARM_THUMB_INSTRUCTION_SUB3, /* - */
+	ARM_THUMB_INSTRUCTION_SUB4,
+	ARM_THUMB_INSTRUCTION_SWI,
+	ARM_THUMB_INSTRUCTION_SXTB,
+	ARM_THUMB_INSTRUCTION_SXTH,
+	ARM_THUMB_INSTRUCTION_TST,
+	ARM_THUMB_INSTRUCTION_UXTB,
+	ARM_THUMB_INSTRUCTION_UXTH,
+	ARM_THUMB_INSTRUCTION_UNKNOWN,
+	ARM_THUMB_INSTRUCTION_NOT_INSTRUMENTED,
+	ARM_THUMB_INSTRUCTION_TOTAL_COUNT,
+	ARM_THUMB_INSTRUCTIONS
+};
+
 typedef void ARMWriteCPFunc(void *opaque, int cp_info,
                             int srcreg, int operand, uint32_t value);
 typedef uint32_t ARMReadCPFunc(void *opaque, int cp_info,
@@ -71,6 +397,14 @@
 typedef struct CPUARMState {
     /* Regs for current mode.  */
     uint32_t regs[16];
+
+    /* Instruction counting "regs". */
+    uint32_t arm_instr_count[ARM_INSTRUCTIONS];
+    /* Instruction counting "regs". */
+    uint32_t arm_vfp_instr_count[ARM_VFP_INSTRUCTIONS];
+    /* Instruction counting "regs". */
+    uint32_t arm_thumb_instr_count[ARM_THUMB_INSTRUCTIONS];
+
     /* Frequently accessed CPSR bits are stored separately for efficiently.
        This contains all the other bits.  Use cpsr_{read,write} to access
        the whole CPSR.  */
@@ -446,4 +780,8 @@
         *flags |= (1 << 7);
 }
 
+extern const char const *arm_instr_names[];
+extern const char const *arm_vfp_instr_names[];
+extern const char const *arm_thumb_instr_names[];
+
 #endif
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c	(.../tags/qemu-0_10_4)	(revision 33)
+++ target-arm/translate.c	(.../trunk/qemu)	(revision 33)
@@ -31,6 +31,8 @@
 #include "tcg-op.h"
 #include "qemu-log.h"
 
+#include "instrumentation.h"
+
 #include "helpers.h"
 #define GEN_HELPER 1
 #include "helpers.h"
@@ -191,7 +193,39 @@
     dead_tmp(var);
 }
 
+typedef struct instr_counter_offsets {
+	uint32_t cpustate_offset;
+	TCGArg *tcg_offset[2];
+} instr_counter_offsets;
 
+static instr_counter_offsets instr_offsets;
+
+
+static inline void instr_count_inc_init(uint32_t offset,  int instr)
+{
+	if (!instrumentation_count_instructions) return;
+	instr_offsets.cpustate_offset = offset;
+	TCGv tmp = new_tmp();
+	tcg_gen_ld_i32(tmp, cpu_env, instr_offsets.cpustate_offset + sizeof(uint32_t) * instr);
+	instr_offsets.tcg_offset[0] = gen_opparam_ptr - 1;
+	tcg_gen_addi_i32(tmp, tmp, 1);
+	tcg_gen_st_i32(tmp, cpu_env, instr_offsets.cpustate_offset + sizeof(uint32_t) * instr);
+	instr_offsets.tcg_offset[1] = gen_opparam_ptr - 1;
+    dead_tmp(tmp);
+}
+
+/* Increment instruction counter */
+static inline void instr_count_inc(int instr)
+{
+	if (!instrumentation_count_instructions) return;
+	*(instr_offsets.tcg_offset[0]) = instr_offsets.cpustate_offset + sizeof(uint32_t) * instr;
+	*(instr_offsets.tcg_offset[1]) = instr_offsets.cpustate_offset + sizeof(uint32_t) * instr;
+}
+
+#define ARM_INSTRUCTION_COUNTER_OFFSET offsetof(CPUState, arm_instr_count)
+#define ARM_VFP_INSTRUCTION_COUNTER_OFFSET offsetof(CPUState, arm_vfp_instr_count)
+#define ARM_THUMB_INSTRUCTION_COUNTER_OFFSET offsetof(CPUState, arm_thumb_instr_count)
+
 /* Basic operations.  */
 #define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1])
 #define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im)
@@ -235,6 +269,329 @@
 /* Set NZCV flags from the high 4 bits of var.  */
 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 
+/* string names for arm_instruction enum values */
+const char const *arm_instr_names[] = {
+		"b",
+		"bl",
+		"blx",
+		"bx",
+		"bxj",
+		"adc",
+		"add",
+		"and",
+		"bic",
+		"cmn",
+		"cmp",
+		"eor",
+		"mov",
+		"mvn",
+		"orr",
+		"rsb",
+		"rsc",
+		"sbc",
+		"sub",
+		"teq",
+		"tst",
+		"mul",
+		"muls",
+		"mla",
+		"mlas",
+		"smla<x><y>",
+		"smlal",
+		"smlals",
+		"smlal<x><y>",
+		"smlaw<y>",
+		"smuad",
+		"smusd",
+		"smlad",
+		"smlsd",
+		"smlald",
+		"smlsld",
+		"smmla",
+		"smmls",
+		"smmul",
+		"smul<x><y>",
+		"smull",
+		"smulls",
+		"smulw<y>",
+		"umaal",
+		"umlal",
+		"umlals",
+		"umull",
+		"umulls",
+		"qadd",
+		"qdadd",
+		"qadd16",
+		"qaddsubx",
+		"qsubaddx",
+		"qsub16",
+		"qadd8",
+		"qsub8",
+		"qsub",
+		"qdsub",
+		"sadd16",
+		"saddsubx",
+		"ssubaddx",
+		"ssub16",
+		"sadd8",
+		"ssub8",
+		"shadd16",
+		"shaddsubx",
+		"shsubaddx",
+		"shsub16",
+		"shadd8",
+		"shsub8",
+		"uadd16",
+		"uaddsubx",
+		"usubaddx",
+		"usub16",
+		"uadd8",
+		"usub8",
+		"uhadd16",
+		"uhaddsubx",
+		"uhsubaddx",
+		"uhsub16",
+		"uhadd8",
+		"uhsub8",
+		"uqadd16",
+		"uqaddsubx",
+		"uqsubaddx",
+		"uqsub16",
+		"uqadd8",
+		"uqsub8",
+		"sxtab16",
+		"sxtab",
+		"sxtah",
+		"sxtb16",
+		"sxtb",
+		"sxth",
+		"uxtab16",
+		"uxtab",
+		"uxtah",
+		"uxtb16",
+		"uxtb",
+		"uxth",
+		"clz",
+		"usad8",
+		"usada8",
+		"pkh",
+		"pkhbt",
+		"pkhtb",
+		"rev",
+		"rev16",
+		"revsh",
+		"sel",
+		"ssat",
+		"ssat16",
+		"usat",
+		"usat16",
+		"mrs",
+		"msr",
+		"cps",
+		"setend",
+		"ldr",
+		"ldrb",
+		"ldrbt",
+		"ldrd",
+		"ldrex",
+		"ldrh",
+		"ldrsb",
+		"ldrsh",
+		"ldrt",
+		"str",
+		"strb",
+		"strbt",
+		"strd",
+		"strex",
+		"strh",
+		"strt",
+		"ldm1", //see arm manual ARM DDI 0100I page A3-27
+		"ldm2",
+		"ldm3",
+		"stm1",
+		"stm2",
+		"swp",
+		"swpb",
+		"bkpt",
+		"swi",
+		"cdp",
+		"ldc",
+		"mcr",
+		"mcrr",
+		"mrc",
+		"mrrc",
+		"stc",
+		"pld",
+		"rfe",
+		"srs",
+		"mcrr2",
+		"mrrc2",
+		"stc2",
+		"ldc2",
+		"cdp2",
+		"mcr2",
+		"mrc2",
+		"coprocessor",
+		"unknown",
+		"not_instrumented",
+		"total_instructions"
+};
+
+const char const *arm_vfp_instr_names[] = { /* string names for arm_vfp_instruction enum values */
+		"fabsd",
+		"fabss",
+		"faddd",
+		"fadds",
+		"fcmpd",
+		"fcmps",
+		"fcmped",
+		"fcmpes",
+		"fcmpezd",
+		"fcmpezs",
+		"fcmpzd",
+		"fcmpzs",
+		"fcpyd",
+		"fcpys",
+		"fcvtds",
+		"fcvtsd",
+		"fdivd",
+		"fdivs",
+		"fldd",
+		"flds",
+		"fldmd",
+		"fldms",
+		"fldmx",
+		"fmacd",
+		"fmacs",
+		"fmdhr",
+		"fmdlr",
+		"fmdrr",
+		"fmrdh",
+		"fmrdl",
+		"fmrrd",
+		"fmrrs",
+		"fmrs",
+		"fmrx",
+		"fmscd",
+		"fmscs",
+		"fmsr",
+		"fmsrr",
+		"fmstat",
+		"fmuld",
+		"fmuls",
+		"fmxr",
+		"fnegd",
+		"fnegs",
+		"fnmacd",
+		"fnmacs",
+		"fnmscd",
+		"fnmscs",
+		"fnmuld",
+		"fnmuls",
+		"fsitod",
+		"fsitos",
+		"fsqrtd",
+		"fsqrts",
+		"fstd",
+		"fsts",
+		"fstmd",
+		"fstms",
+		"fstmx",
+		"fsubd",
+		"fsubs",
+		"ftosid",
+		"ftosis",
+		"ftouid",
+		"ftouis",
+		"fuitod",
+		"fuitos",
+		"unknown",
+		"not_instrumented",
+		"total_count"
+};
+
+/* string names for arm_thumb_instruction enum values */
+const char const *arm_thumb_instr_names[] = {
+		"adc",
+		"add1",
+		"add2",
+		"add3",
+		"add4",
+		"add5",
+		"add6",
+		"add7",
+		"and",
+		"asr1",
+		"asr2",
+		"b1",
+		"b2",
+		"bic",
+		"bkpt",
+		"bl",
+		"blx1",
+		"blx2",
+		"bx",
+		"cmn",
+		"cmp1",
+		"cmp2",
+		"cmp3",
+		"cps",
+		"cpy",
+		"eor",
+		"ldmia",
+		"ldr1",
+		"ldr2",
+		"ldr3",
+		"ldr4",
+		"ldrb1",
+		"ldrb2",
+		"ldrh1",
+		"ldrh2",
+		"ldrsb",
+		"ldrsh",
+		"lsl1",
+		"lsl2",
+		"lsr1",
+		"lsr2",
+		"mov1",
+		"mov2",
+		"mov3",
+		"mul",
+		"mvn",
+		"neg",
+		"orr",
+		"pop",
+		"push",
+		"rev",
+		"rev16",
+		"revsh",
+		"ror",
+		"sbc",
+		"setend",
+		"stmia",
+		"str1",
+		"str2",
+		"str3",
+		"strb1",
+		"strb2",
+		"strh1",
+		"strh2",
+		"sub1",
+		"sub2",
+		"sub3",
+		"sub4",
+		"swi",
+		"sxtb",
+		"sxth",
+		"tst",
+		"uxtb",
+		"uxth",
+		"unknown",
+		"not_instrumented",
+		"total_count",
+};
+
+
 static void gen_exception(int excp)
 {
     TCGv tmp = new_tmp();
@@ -580,6 +937,7 @@
 static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
 {
     TCGv_ptr tmp;
+    unsigned int instr_index = 0;
 
     switch (op1) {
 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
@@ -587,28 +945,37 @@
         tmp = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
         PAS_OP(s)
+        instr_index = ARM_INSTRUCTION_SADD16;
         break;
     case 5:
         tmp = tcg_temp_new_ptr();
         tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
         PAS_OP(u)
+        instr_index = ARM_INSTRUCTION_UADD16;
         break;
 #undef gen_pas_helper
 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
     case 2:
         PAS_OP(q);
+        instr_index = ARM_INSTRUCTION_QADD16;
         break;
     case 3:
         PAS_OP(sh);
+        instr_index = ARM_INSTRUCTION_SHADD16;
         break;
     case 6:
         PAS_OP(uq);
+        instr_index = ARM_INSTRUCTION_UQADD16;
         break;
     case 7:
         PAS_OP(uh);
+        instr_index = ARM_INSTRUCTION_UHADD16;
         break;
 #undef gen_pas_helper
     }
+    if (op2 == 7) instr_index += 5;
+    else instr_index += op2;
+    instr_count_inc(instr_index);
 }
 #undef PAS_OP
 
@@ -2693,6 +3060,14 @@
     dead_tmp(tmp);
 }
 
+/*
+#define inc_vfp_instr_counter(dp, index)\
+	if (dp)\
+		instr_count_inc(arm_vfp_instr_offsets, index);\
+	else\
+		instr_count_inc(arm_vfp_instr_offsets, index + 1)
+*/
+
 /* Disassemble a VFP instruction.  Returns nonzero if an error occured
    (ie. an undefined instruction).  */
 static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
@@ -2702,6 +3077,9 @@
     TCGv tmp;
     TCGv tmp2;
 
+    instr_count_inc_init(ARM_VFP_INSTRUCTION_COUNTER_OFFSET,
+    		ARM_VFP_INSTRUCTION_NOT_INSTRUMENTED);
+
     if (!arm_feature(env, ARM_FEATURE_VFP))
         return 1;
 
@@ -2724,6 +3102,8 @@
                 int size;
                 int pass;
 
+                instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
+
                 VFP_DREG_N(rn, insn);
                 if (insn & 0xf)
                     return 1;
@@ -2770,6 +3150,8 @@
                         }
                         break;
                     case 2:
+                    	if (pass) instr_count_inc(ARM_VFP_INSTRUCTION_FMRDH);
+                    	else instr_count_inc(ARM_VFP_INSTRUCTION_FMRDL);
                         break;
                     }
                     store_reg(s, rd, tmp);
@@ -2801,6 +3183,8 @@
                             dead_tmp(tmp2);
                             break;
                         case 2:
+                        	if (pass) instr_count_inc(ARM_VFP_INSTRUCTION_FMDHR);
+                        	else instr_count_inc(ARM_VFP_INSTRUCTION_FMDLR);
                             break;
                         }
                         neon_store_reg(rn, pass, tmp);
@@ -2814,6 +3198,7 @@
                     /* vfp->arm */
                     if (insn & (1 << 21)) {
                         /* system register */
+                    	instr_count_inc(ARM_VFP_INSTRUCTION_FMRX);
                         rn >>= 1;
 
                         switch (rn) {
@@ -2841,6 +3226,7 @@
                             break;
                         case ARM_VFP_FPSCR:
                             if (rd == 15) {
+                            	instr_count_inc(ARM_VFP_INSTRUCTION_FMSTAT);
                                 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
                                 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
                             } else {
@@ -2859,6 +3245,7 @@
                             return 1;
                         }
                     } else {
+                    	instr_count_inc(ARM_VFP_INSTRUCTION_FMRS);
                         gen_mov_F0_vreg(0, rn);
                         tmp = gen_vfp_mrs();
                     }
@@ -2873,6 +3260,7 @@
                     /* arm->vfp */
                     tmp = load_reg(s, rd);
                     if (insn & (1 << 21)) {
+                    	instr_count_inc(ARM_VFP_INSTRUCTION_FMXR);
                         rn >>= 1;
                         /* system register */
                         switch (rn) {
@@ -2900,6 +3288,7 @@
                             return 1;
                         }
                     } else {
+                    	instr_count_inc(ARM_VFP_INSTRUCTION_FMSR);
                         gen_vfp_msr(tmp);
                         gen_mov_vreg_F0(0, rn);
                     }
@@ -3023,44 +3412,54 @@
                 /* Perform the calculation.  */
                 switch (op) {
                 case 0: /* mac: fd + (fn * fm) */
+               		instr_count_inc(ARM_VFP_INSTRUCTION_FMACD + (1 - dp));
                     gen_vfp_mul(dp);
                     gen_mov_F1_vreg(dp, rd);
                     gen_vfp_add(dp);
                     break;
                 case 1: /* nmac: fd - (fn * fm) */
+               		instr_count_inc(ARM_VFP_INSTRUCTION_FNMACD + (1 - dp));
                     gen_vfp_mul(dp);
                     gen_vfp_neg(dp);
                     gen_mov_F1_vreg(dp, rd);
                     gen_vfp_add(dp);
                     break;
                 case 2: /* msc: -fd + (fn * fm) */
+               		instr_count_inc(ARM_VFP_INSTRUCTION_FMSCD + (1 - dp));
                     gen_vfp_mul(dp);
                     gen_mov_F1_vreg(dp, rd);
                     gen_vfp_sub(dp);
                     break;
                 case 3: /* nmsc: -fd - (fn * fm)  */
+               		instr_count_inc(ARM_VFP_INSTRUCTION_FNMSCD + (1 - dp));
                     gen_vfp_mul(dp);
                     gen_vfp_neg(dp);
                     gen_mov_F1_vreg(dp, rd);
                     gen_vfp_sub(dp);
                     break;
                 case 4: /* mul: fn * fm */
+               		instr_count_inc(ARM_VFP_INSTRUCTION_FMULD + (1 - dp));
                     gen_vfp_mul(dp);
                     break;
                 case 5: /* nmul: -(fn * fm) */
+               		instr_count_inc(ARM_VFP_INSTRUCTION_FNMULD + (1 - dp));
                     gen_vfp_mul(dp);
                     gen_vfp_neg(dp);
                     break;
                 case 6: /* add: fn + fm */
-                    gen_vfp_add(dp);
+               		instr_count_inc(ARM_VFP_INSTRUCTION_FADDD + (1 - dp));
+					gen_vfp_add(dp);
                     break;
                 case 7: /* sub: fn - fm */
+               		instr_count_inc(ARM_VFP_INSTRUCTION_FSUBD + (1 - dp));
                     gen_vfp_sub(dp);
                     break;
                 case 8: /* div: fn / fm */
+               		instr_count_inc(ARM_VFP_INSTRUCTION_FDIVD + (1 - dp));
                     gen_vfp_div(dp);
                     break;
                 case 14: /* fconst */
+               		instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                     if (!arm_feature(env, ARM_FEATURE_VFP3))
                       return 1;
 
@@ -3085,90 +3484,116 @@
                 case 15: /* extension space */
                     switch (rn) {
                     case 0: /* cpy */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_FCPYD + (1 - dp));
                         /* no-op */
                         break;
                     case 1: /* abs */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_FABSD + (1 - dp));
                         gen_vfp_abs(dp);
                         break;
                     case 2: /* neg */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_FNEGD + (1 - dp));
                         gen_vfp_neg(dp);
                         break;
                     case 3: /* sqrt */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_FSQRTD + (1 - dp));
                         gen_vfp_sqrt(dp);
                         break;
                     case 8: /* cmp */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_FCMPD + (1 - dp));
                         gen_vfp_cmp(dp);
                         break;
                     case 9: /* cmpe */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_FCMPED + (1 - dp));
                         gen_vfp_cmpe(dp);
                         break;
                     case 10: /* cmpz */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_FCMPZD + (1 - dp));
                         gen_vfp_cmp(dp);
                         break;
                     case 11: /* cmpez */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_FCMPEZD + (1 - dp));
                         gen_vfp_F1_ld0(dp);
                         gen_vfp_cmpe(dp);
                         break;
                     case 15: /* single<->double conversion */
-                        if (dp)
+                        if (dp) {
+                       		instr_count_inc(ARM_VFP_INSTRUCTION_FCVTSD);
                             gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
-                        else
+                        }
+                        else {
+                       		instr_count_inc(ARM_VFP_INSTRUCTION_FCVTDS);
                             gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
+                        }
                         break;
                     case 16: /* fuito */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_FUITOD + (1 - dp));
                         gen_vfp_uito(dp);
                         break;
                     case 17: /* fsito */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_FSITOD + (1 - dp));
                         gen_vfp_sito(dp);
                         break;
                     case 20: /* fshto */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_shto(dp, 16 - rm);
                         break;
                     case 21: /* fslto */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_slto(dp, 32 - rm);
                         break;
                     case 22: /* fuhto */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_uhto(dp, 16 - rm);
                         break;
                     case 23: /* fulto */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_ulto(dp, 32 - rm);
                         break;
                     case 24: /* ftoui */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_FTOUID + (1 - dp));
                         gen_vfp_toui(dp);
                         break;
                     case 25: /* ftouiz */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         gen_vfp_touiz(dp);
                         break;
                     case 26: /* ftosi */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_FTOSID + (1 - dp));
                         gen_vfp_tosi(dp);
                         break;
                     case 27: /* ftosiz */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         gen_vfp_tosiz(dp);
                         break;
                     case 28: /* ftosh */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_tosh(dp, 16 - rm);
                         break;
                     case 29: /* ftosl */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_tosl(dp, 32 - rm);
                         break;
                     case 30: /* ftouh */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_touh(dp, 16 - rm);
                         break;
                     case 31: /* ftoul */
+                   		instr_count_inc(ARM_VFP_INSTRUCTION_UNKNOWN);
                         if (!arm_feature(env, ARM_FEATURE_VFP3))
                           return 1;
                         gen_vfp_toul(dp, 32 - rm);
@@ -3247,6 +3672,7 @@
             if (insn & ARM_CP_RW_BIT) {
                 /* vfp->arm */
                 if (dp) {
+                	instr_count_inc(ARM_VFP_INSTRUCTION_FMRRD);
                     gen_mov_F0_vreg(0, rm * 2);
                     tmp = gen_vfp_mrs();
                     store_reg(s, rd, tmp);
@@ -3254,6 +3680,7 @@
                     tmp = gen_vfp_mrs();
                     store_reg(s, rn, tmp);
                 } else {
+                	instr_count_inc(ARM_VFP_INSTRUCTION_FMRRS);
                     gen_mov_F0_vreg(0, rm);
                     tmp = gen_vfp_mrs();
                     store_reg(s, rn, tmp);
@@ -3264,6 +3691,7 @@
             } else {
                 /* arm->vfp */
                 if (dp) {
+                	instr_count_inc(ARM_VFP_INSTRUCTION_FMDRR);
                     tmp = load_reg(s, rd);
                     gen_vfp_msr(tmp);
                     gen_mov_vreg_F0(0, rm * 2);
@@ -3271,6 +3699,7 @@
                     gen_vfp_msr(tmp);
                     gen_mov_vreg_F0(0, rm * 2 + 1);
                 } else {
+                	instr_count_inc(ARM_VFP_INSTRUCTION_FMSRR);
                     tmp = load_reg(s, rn);
                     gen_vfp_msr(tmp);
                     gen_mov_vreg_F0(0, rm);
@@ -3298,9 +3727,11 @@
                     offset = -offset;
                 gen_op_addl_T1_im(offset);
                 if (insn & (1 << 20)) {
+                	instr_count_inc(ARM_VFP_INSTRUCTION_FLDD + (1 - dp));
                     gen_vfp_ld(s, dp);
                     gen_mov_vreg_F0(dp, rd);
                 } else {
+                	instr_count_inc(ARM_VFP_INSTRUCTION_FSTD + (1 - dp));
                     gen_mov_F0_vreg(dp, rd);
                     gen_vfp_st(s, dp);
                 }
@@ -3321,10 +3752,12 @@
                 for (i = 0; i < n; i++) {
                     if (insn & ARM_CP_RW_BIT) {
                         /* load */
+                    	instr_count_inc(ARM_VFP_INSTRUCTION_FLDMD + (1 - dp));
                         gen_vfp_ld(s, dp);
                         gen_mov_vreg_F0(dp, rd + i);
                     } else {
                         /* store */
+                    	instr_count_inc(ARM_VFP_INSTRUCTION_FSTMD + (1 - dp));
                         gen_mov_F0_vreg(dp, rd + i);
                         gen_vfp_st(s, dp);
                     }
@@ -5697,6 +6130,7 @@
 static void disas_arm_insn(CPUState * env, DisasContext *s)
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
+    unsigned int instr_index = 0;
     TCGv tmp;
     TCGv tmp2;
     TCGv tmp3;
@@ -5706,14 +6140,18 @@
     insn = ldl_code(s->pc);
     s->pc += 4;
 
+    instr_count_inc_init(ARM_INSTRUCTION_COUNTER_OFFSET,
+    		ARM_INSTRUCTION_NOT_INSTRUMENTED);
+
     /* M variants do not implement ARM mode.  */
     if (IS_M(env))
         goto illegal_op;
     cond = insn >> 28;
-    if (cond == 0xf){
+    if (cond == 0xf) {
         /* Unconditional instructions.  */
         if (((insn >> 25) & 7) == 1) {
             /* NEON Data processing.  */
+        	instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
             if (!arm_feature(env, ARM_FEATURE_NEON))
                 goto illegal_op;
 
@@ -5723,6 +6161,7 @@
         }
         if ((insn & 0x0f100000) == 0x04000000) {
             /* NEON load/store.  */
+        	instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
             if (!arm_feature(env, ARM_FEATURE_NEON))
                 goto illegal_op;
 
@@ -5734,6 +6173,7 @@
             return; /* PLD */
         else if ((insn & 0x0ffffdff) == 0x01010000) {
             ARCH(6);
+        	instr_count_inc(ARM_INSTRUCTION_SETEND);
             /* setend */
             if (insn & (1 << 9)) {
                 /* BE8 mode not implemented.  */
@@ -5744,11 +6184,13 @@
             switch ((insn >> 4) & 0xf) {
             case 1: /* clrex */
                 ARCH(6K);
+            	instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                 gen_helper_clrex(cpu_env);
                 return;
             case 4: /* dsb */
             case 5: /* dmb */
             case 6: /* isb */
+            	instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                 ARCH(7);
                 /* We don't emulate caches so these are a no-op.  */
                 return;
@@ -5758,6 +6200,7 @@
         } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
             /* srs */
             uint32_t offset;
+            instr_count_inc(ARM_INSTRUCTION_SRS);
             if (IS_USER(s))
                 goto illegal_op;
             ARCH(6);
@@ -5808,6 +6251,7 @@
             uint32_t offset;
             if (IS_USER(s))
                 goto illegal_op;
+            instr_count_inc(ARM_INSTRUCTION_RFE);
             ARCH(6);
             rn = (insn >> 16) & 0xf;
             addr = load_reg(s, rn);
@@ -5844,7 +6288,7 @@
         } else if ((insn & 0x0e000000) == 0x0a000000) {
             /* branch link and change to thumb (blx <offset>) */
             int32_t offset;
-
+            instr_count_inc(ARM_INSTRUCTION_BLX);
             val = (uint32_t)s->pc;
             tmp = new_tmp();
             tcg_gen_movi_i32(tmp, val);
@@ -5858,6 +6302,7 @@
             gen_bx_im(s, val);
             return;
         } else if ((insn & 0x0e000f00) == 0x0c000100) {
+        	instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
             if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
                 /* iWMMXt register transfer.  */
                 if (env->cp15.c15_cpar & (1 << 1))
@@ -5865,13 +6310,16 @@
                         return;
             }
         } else if ((insn & 0x0fe00000) == 0x0c400000) {
+        	instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
             /* Coprocessor double register transfer.  */
         } else if ((insn & 0x0f000010) == 0x0e000010) {
+        	instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
             /* Additional coprocessor register transfer.  */
         } else if ((insn & 0x0ff10020) == 0x01000000) {
             uint32_t mask;
             uint32_t val;
             /* cps (privileged) */
+            instr_count_inc(ARM_INSTRUCTION_CPS);
             if (IS_USER(s))
                 return;
             mask = val = 0;
@@ -5911,10 +6359,12 @@
             val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
             if ((insn & (1 << 22)) == 0) {
                 /* MOVW */
+            	instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                 tmp = new_tmp();
                 tcg_gen_movi_i32(tmp, val);
             } else {
                 /* MOVT */
+            	instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                 tmp = load_reg(s, rd);
                 tcg_gen_ext16u_i32(tmp, tmp);
                 tcg_gen_ori_i32(tmp, tmp, val << 16);
@@ -5924,9 +6374,11 @@
             if (((insn >> 12) & 0xf) != 0xf)
                 goto illegal_op;
             if (((insn >> 16) & 0xf) == 0) {
+            	instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                 gen_nop_hint(s, insn & 0xff);
             } else {
                 /* CPSR = immediate */
+            	instr_count_inc(ARM_INSTRUCTION_MSR);
                 val = insn & 0xff;
                 shift = ((insn >> 8) & 0xf) * 2;
                 if (shift)
@@ -5947,12 +6399,14 @@
         case 0x0: /* move program status register */
             if (op1 & 1) {
                 /* PSR = reg */
+                instr_count_inc(ARM_INSTRUCTION_MSR);
                 gen_movl_T0_reg(s, rm);
                 i = ((op1 & 2) != 0);
                 if (gen_set_psr_T0(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i))
                     goto illegal_op;
             } else {
                 /* reg = PSR */
+                instr_count_inc(ARM_INSTRUCTION_MRS);
                 rd = (insn >> 12) & 0xf;
                 if (op1 & 2) {
                     if (IS_USER(s))
@@ -5968,10 +6422,12 @@
         case 0x1:
             if (op1 == 1) {
                 /* branch/exchange thumb (bx).  */
+            	instr_count_inc(ARM_INSTRUCTION_BX);
                 tmp = load_reg(s, rm);
                 gen_bx(s, tmp);
             } else if (op1 == 3) {
                 /* clz */
+            	instr_count_inc(ARM_INSTRUCTION_CLZ);
                 rd = (insn >> 12) & 0xf;
                 tmp = load_reg(s, rm);
                 gen_helper_clz(tmp, tmp);
@@ -5982,6 +6438,7 @@
             break;
         case 0x2:
             if (op1 == 1) {
+            	instr_count_inc(ARM_INSTRUCTION_BXJ);
                 ARCH(5J); /* bxj */
                 /* Trivial implementation equivalent to bx.  */
                 tmp = load_reg(s, rm);
@@ -5993,7 +6450,7 @@
         case 0x3:
             if (op1 != 1)
               goto illegal_op;
-
+            instr_count_inc(ARM_INSTRUCTION_BLX); //TODO: add own counter
             /* branch link/exchange thumb (blx) */
             tmp = load_reg(s, rm);
             tmp2 = new_tmp();
@@ -6006,16 +6463,24 @@
             rn = (insn >> 16) & 0xf;
             tmp = load_reg(s, rm);
             tmp2 = load_reg(s, rn);
-            if (op1 & 2)
+            if (op1 & 2) {
                 gen_helper_double_saturate(tmp2, tmp2);
-            if (op1 & 1)
+                if (op1 & 1) instr_count_inc(ARM_INSTRUCTION_QDSUB);
+                else instr_count_inc(ARM_INSTRUCTION_QDADD);
+            }
+            if (op1 & 1) {
                 gen_helper_sub_saturate(tmp, tmp, tmp2);
-            else
+                instr_count_inc(ARM_INSTRUCTION_QSUB);
+            }
+            else {
                 gen_helper_add_saturate(tmp, tmp, tmp2);
+                instr_count_inc(ARM_INSTRUCTION_QADD);
+            }
             dead_tmp(tmp2);
             store_reg(s, rd, tmp);
             break;
         case 7: /* bkpt */
+        	instr_count_inc(ARM_INSTRUCTION_BKPT);
             gen_set_condexec(s);
             gen_set_pc_im(s->pc - 4);
             gen_exception(EXCP_BKPT);
@@ -6041,18 +6506,22 @@
                 tmp = new_tmp();
                 tcg_gen_trunc_i64_i32(tmp, tmp64);
                 if ((sh & 2) == 0) {
+                	instr_count_inc(ARM_INSTRUCTION_SMLAWY);
                     tmp2 = load_reg(s, rn);
                     gen_helper_add_setq(tmp, tmp, tmp2);
                     dead_tmp(tmp2);
                 }
+                else instr_count_inc(ARM_INSTRUCTION_SMULWY);
                 store_reg(s, rd, tmp);
             } else {
                 /* 16 * 16 */
+            	if (op1 == 3) instr_count_inc(ARM_INSTRUCTION_SMULXY);
                 tmp = load_reg(s, rm);
                 tmp2 = load_reg(s, rs);
                 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
                 dead_tmp(tmp2);
                 if (op1 == 2) {
+                	instr_count_inc(ARM_INSTRUCTION_SMLALXY);
                     tmp64 = tcg_temp_new_i64();
                     tcg_gen_ext_i32_i64(tmp64, tmp);
                     dead_tmp(tmp);
@@ -6060,6 +6529,7 @@
                     gen_storeq_reg(s, rn, rd, tmp64);
                 } else {
                     if (op1 == 0) {
+                    	instr_count_inc(ARM_INSTRUCTION_SMLAXY);
                         tmp2 = load_reg(s, rn);
                         gen_helper_add_setq(tmp, tmp, tmp2);
                         dead_tmp(tmp2);
@@ -6111,18 +6581,21 @@
         rd = (insn >> 12) & 0xf;
         switch(op1) {
         case 0x00:
+            instr_count_inc(ARM_INSTRUCTION_AND);
             gen_op_andl_T0_T1();
             gen_movl_reg_T0(s, rd);
             if (logic_cc)
                 gen_op_logic_T0_cc();
             break;
         case 0x01:
+            instr_count_inc(ARM_INSTRUCTION_EOR);
             gen_op_xorl_T0_T1();
             gen_movl_reg_T0(s, rd);
             if (logic_cc)
                 gen_op_logic_T0_cc();
             break;
         case 0x02:
+            instr_count_inc(ARM_INSTRUCTION_SUB);
             if (set_cc && rd == 15) {
                 /* SUBS r15, ... is used for exception return.  */
                 if (IS_USER(s))
@@ -6131,20 +6604,22 @@
                 gen_exception_return(s);
             } else {
                 if (set_cc)
-                    gen_op_subl_T0_T1_cc();
+					gen_op_subl_T0_T1_cc();
                 else
                     gen_op_subl_T0_T1();
                 gen_movl_reg_T0(s, rd);
             }
             break;
         case 0x03:
-            if (set_cc)
-                gen_op_rsbl_T0_T1_cc();
+        	instr_count_inc(ARM_INSTRUCTION_RSB);
+        	if (set_cc)
+				gen_op_rsbl_T0_T1_cc();
             else
                 gen_op_rsbl_T0_T1();
             gen_movl_reg_T0(s, rd);
             break;
         case 0x04:
+        	instr_count_inc(ARM_INSTRUCTION_ADD);
             if (set_cc)
                 gen_op_addl_T0_T1_cc();
             else
@@ -6152,13 +6627,15 @@
             gen_movl_reg_T0(s, rd);
             break;
         case 0x05:
-            if (set_cc)
+        	instr_count_inc(ARM_INSTRUCTION_ADC);
+        	if (set_cc)
                 gen_op_adcl_T0_T1_cc();
             else
                 gen_adc_T0_T1();
             gen_movl_reg_T0(s, rd);
             break;
         case 0x06:
+        	instr_count_inc(ARM_INSTRUCTION_SBC);
             if (set_cc)
                 gen_op_sbcl_T0_T1_cc();
             else
@@ -6166,6 +6643,7 @@
             gen_movl_reg_T0(s, rd);
             break;
         case 0x07:
+        	instr_count_inc(ARM_INSTRUCTION_RSC);
             if (set_cc)
                 gen_op_rscl_T0_T1_cc();
             else
@@ -6173,34 +6651,40 @@
             gen_movl_reg_T0(s, rd);
             break;
         case 0x08:
+        	instr_count_inc(ARM_INSTRUCTION_TST);
             if (set_cc) {
                 gen_op_andl_T0_T1();
                 gen_op_logic_T0_cc();
             }
             break;
         case 0x09:
+        	instr_count_inc(ARM_INSTRUCTION_TEQ);
             if (set_cc) {
                 gen_op_xorl_T0_T1();
                 gen_op_logic_T0_cc();
             }
             break;
         case 0x0a:
+        	instr_count_inc(ARM_INSTRUCTION_CMP);
             if (set_cc) {
                 gen_op_subl_T0_T1_cc();
             }
             break;
         case 0x0b:
+        	instr_count_inc(ARM_INSTRUCTION_CMN);
             if (set_cc) {
                 gen_op_addl_T0_T1_cc();
             }
             break;
         case 0x0c:
+        	instr_count_inc(ARM_INSTRUCTION_ORR);
             gen_op_orl_T0_T1();
             gen_movl_reg_T0(s, rd);
             if (logic_cc)
                 gen_op_logic_T0_cc();
             break;
         case 0x0d:
+        	instr_count_inc(ARM_INSTRUCTION_MOV);
             if (logic_cc && rd == 15) {
                 /* MOVS r15, ... is used for exception return.  */
                 if (IS_USER(s))
@@ -6214,6 +6698,7 @@
             }
             break;
         case 0x0e:
+        	instr_count_inc(ARM_INSTRUCTION_BIC);
             gen_op_bicl_T0_T1();
             gen_movl_reg_T0(s, rd);
             if (logic_cc)
@@ -6221,6 +6706,7 @@
             break;
         default:
         case 0x0f:
+        	instr_count_inc(ARM_INSTRUCTION_MVN);
             gen_op_notl_T1();
             gen_movl_reg_T1(s, rd);
             if (logic_cc)
@@ -6245,43 +6731,61 @@
                     switch (op1) {
                     case 0: case 1: case 2: case 3: case 6:
                         /* 32 bit mul */
+                    	instr_index = ARM_INSTRUCTION_MUL;
                         tmp = load_reg(s, rs);
                         tmp2 = load_reg(s, rm);
                         tcg_gen_mul_i32(tmp, tmp, tmp2);
                         dead_tmp(tmp2);
                         if (insn & (1 << 22)) {
                             /* Subtract (mls) */
+                        	instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                             ARCH(6T2);
                             tmp2 = load_reg(s, rn);
                             tcg_gen_sub_i32(tmp, tmp2, tmp);
                             dead_tmp(tmp2);
                         } else if (insn & (1 << 21)) {
                             /* Add */
+                        	instr_index = ARM_INSTRUCTION_MLA;
                             tmp2 = load_reg(s, rn);
                             tcg_gen_add_i32(tmp, tmp, tmp2);
                             dead_tmp(tmp2);
                         }
-                        if (insn & (1 << 20))
+                        if (insn & (1 << 20)) {
                             gen_logic_CC(tmp);
+                        	instr_index++; /* MULS and MLAS */
+                        }
+                        instr_count_inc(instr_index);
                         store_reg(s, rd, tmp);
                         break;
                     default:
                         /* 64 bit mul */
                         tmp = load_reg(s, rs);
                         tmp2 = load_reg(s, rm);
-                        if (insn & (1 << 22))
+                        if (insn & (1 << 22)) {
                             tmp64 = gen_muls_i64_i32(tmp, tmp2);
-                        else
+                            instr_index = ARM_INSTRUCTION_SMULL;
+                        }
+                        else {
                             tmp64 = gen_mulu_i64_i32(tmp, tmp2);
-                        if (insn & (1 << 21)) /* mult accumulate */
+                            instr_index = ARM_INSTRUCTION_UMULL;
+                        }
+                        if (insn & (1 << 21)) {
+                        	/* mult accumulate */
                             gen_addq(s, tmp64, rn, rd);
+                            if (insn & (1 << 22))
+                            	instr_index = ARM_INSTRUCTION_SMLAL;
+                            else instr_index = ARM_INSTRUCTION_UMLAL;
+                        }
                         if (!(insn & (1 << 23))) { /* double accumulate */
                             ARCH(6);
                             gen_addq_lo(s, tmp64, rn);
                             gen_addq_lo(s, tmp64, rd);
                         }
-                        if (insn & (1 << 20))
+                        if (insn & (1 << 20)) {
                             gen_logicq_cc(tmp64);
+                            instr_index++; /* SMULLS, UMULLS, SMLALS, UMLALS */
+                        }
+                        instr_count_inc(instr_index);
                         gen_storeq_reg(s, rn, rd, tmp64);
                         break;
                     }
@@ -6300,6 +6804,7 @@
                         if (insn & (1 << 20)) {
                             gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
                             switch (op1) {
+                        	instr_count_inc(ARM_INSTRUCTION_LDREX); //TODO: add counters for all ldrex types
                             case 0: /* ldrex */
                                 tmp = gen_ld32(addr, IS_USER(s));
                                 break;
@@ -6327,6 +6832,7 @@
                             tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0],
                                                 0, label);
                             tmp = load_reg(s,rm);
+                        	instr_count_inc(ARM_INSTRUCTION_STREX);
                             switch (op1) {
                             case 0:  /*  strex */
                                 gen_st32(tmp, addr, IS_USER(s));
@@ -6359,9 +6865,11 @@
                         addr = load_reg(s, rn);
                         tmp = load_reg(s, rm);
                         if (insn & (1 << 22)) {
-                            tmp2 = gen_ld8u(addr, IS_USER(s));
+                        	instr_count_inc(ARM_INSTRUCTION_SWPB);
+                        	tmp2 = gen_ld8u(addr, IS_USER(s));
                             gen_st8(tmp, addr, IS_USER(s));
                         } else {
+                        	instr_count_inc(ARM_INSTRUCTION_SWP);
                             tmp2 = gen_ld32(addr, IS_USER(s));
                             gen_st32(tmp, addr, IS_USER(s));
                         }
@@ -6383,13 +6891,16 @@
                     /* load */
                     switch(sh) {
                     case 1:
+                        instr_count_inc(ARM_INSTRUCTION_LDRH);
                         tmp = gen_ld16u(addr, IS_USER(s));
                         break;
                     case 2:
+                        instr_count_inc(ARM_INSTRUCTION_LDRSB);
                         tmp = gen_ld8s(addr, IS_USER(s));
                         break;
                     default:
                     case 3:
+                        instr_count_inc(ARM_INSTRUCTION_LDRSH);
                         tmp = gen_ld16s(addr, IS_USER(s));
                         break;
                     }
@@ -6398,6 +6909,7 @@
                     /* doubleword */
                     if (sh & 1) {
                         /* store */
+                        instr_count_inc(ARM_INSTRUCTION_STRD);
                         tmp = load_reg(s, rd);
                         gen_st32(tmp, addr, IS_USER(s));
                         tcg_gen_addi_i32(addr, addr, 4);
@@ -6406,6 +6918,7 @@
                         load = 0;
                     } else {
                         /* load */
+                        instr_count_inc(ARM_INSTRUCTION_LDRD);
                         tmp = gen_ld32(addr, IS_USER(s));
                         store_reg(s, rd, tmp);
                         tcg_gen_addi_i32(addr, addr, 4);
@@ -6417,6 +6930,7 @@
                 } else {
                     /* store */
                     tmp = load_reg(s, rd);
+                    instr_count_inc(ARM_INSTRUCTION_STRH);
                     gen_st16(tmp, addr, IS_USER(s));
                     load = 0;
                 }
@@ -6454,6 +6968,7 @@
                 rs = (insn >> 8) & 0xf;
                 switch ((insn >> 23) & 3) {
                 case 0: /* Parallel add/subtract.  */
+                	instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                     op1 = (insn >> 20) & 7;
                     tmp = load_reg(s, rn);
                     tmp2 = load_reg(s, rm);
@@ -6472,6 +6987,7 @@
                         shift = (insn >> 7) & 0x1f;
                         if (insn & (1 << 6)) {
                             /* pkhtb */
+                        	instr_count_inc(ARM_INSTRUCTION_PKHTB);
                             if (shift == 0)
                                 shift = 31;
                             tcg_gen_sari_i32(tmp2, tmp2, shift);
@@ -6479,6 +6995,7 @@
                             tcg_gen_ext16u_i32(tmp2, tmp2);
                         } else {
                             /* pkhbt */
+                        	instr_count_inc(ARM_INSTRUCTION_PKHBT);
                             if (shift)
                                 tcg_gen_shli_i32(tmp2, tmp2, shift);
                             tcg_gen_ext16u_i32(tmp, tmp);
@@ -6500,10 +7017,14 @@
                         }
                         sh = (insn >> 16) & 0x1f;
                         if (sh != 0) {
-                            if (insn & (1 << 22))
+                            if (insn & (1 << 22)) {
+                            	instr_count_inc(ARM_INSTRUCTION_USAT);
                                 gen_helper_usat(tmp, tmp, tcg_const_i32(sh));
-                            else
+                            }
+                            else {
+                            	instr_count_inc(ARM_INSTRUCTION_SSAT);
                                 gen_helper_ssat(tmp, tmp, tcg_const_i32(sh));
+                            }
                         }
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x00300fe0) == 0x00200f20) {
@@ -6511,14 +7032,19 @@
                         tmp = load_reg(s, rm);
                         sh = (insn >> 16) & 0x1f;
                         if (sh != 0) {
-                            if (insn & (1 << 22))
+                            if (insn & (1 << 22)) {
+                            	instr_count_inc(ARM_INSTRUCTION_USAT16);
                                 gen_helper_usat16(tmp, tmp, tcg_const_i32(sh));
-                            else
+                            }
+                            else {
+                            	instr_count_inc(ARM_INSTRUCTION_SSAT16);
                                 gen_helper_ssat16(tmp, tmp, tcg_const_i32(sh));
+                            }
                         }
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x00700fe0) == 0x00000fa0) {
                         /* Select bytes.  */
+                    	instr_count_inc(ARM_INSTRUCTION_SEL);
                         tmp = load_reg(s, rn);
                         tmp2 = load_reg(s, rm);
                         tmp3 = new_tmp();
@@ -6536,12 +7062,30 @@
                             tcg_gen_rori_i32(tmp, tmp, shift * 8);
                         op1 = (insn >> 20) & 7;
                         switch (op1) {
-                        case 0: gen_sxtb16(tmp);  break;
-                        case 2: gen_sxtb(tmp);    break;
-                        case 3: gen_sxth(tmp);    break;
-                        case 4: gen_uxtb16(tmp);  break;
-                        case 6: gen_uxtb(tmp);    break;
-                        case 7: gen_uxth(tmp);    break;
+                        case 0:
+                        	instr_index = ARM_INSTRUCTION_SXTB16;
+                        	gen_sxtb16(tmp);
+                        	break;
+                        case 2:
+                        	instr_index = ARM_INSTRUCTION_SXTB;
+                        	gen_sxtb(tmp);
+                        	break;
+                        case 3:
+                        	instr_index = ARM_INSTRUCTION_SXTH;
+                        	gen_sxth(tmp);
+                        	break;
+                        case 4:
+                        	instr_index = ARM_INSTRUCTION_UXTB16;
+                        	gen_uxtb16(tmp);
+                        	break;
+                        case 6:
+                        	instr_index = ARM_INSTRUCTION_UXTB;
+                        	gen_uxtb(tmp);
+                        	break;
+                        case 7:
+                        	instr_index = ARM_INSTRUCTION_UXTH;
+                        	gen_uxth(tmp);
+                        	break;
                         default: goto illegal_op;
                         }
                         if (rn != 15) {
@@ -6552,23 +7096,30 @@
                                 tcg_gen_add_i32(tmp, tmp, tmp2);
                                 dead_tmp(tmp2);
                             }
+                            instr_index -= 3; /* add variants */
                         }
+                        instr_count_inc(instr_index);
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x003f0f60) == 0x003f0f20) {
                         /* rev */
                         tmp = load_reg(s, rm);
                         if (insn & (1 << 22)) {
                             if (insn & (1 << 7)) {
+                            	instr_count_inc(ARM_INSTRUCTION_REVSH);
                                 gen_revsh(tmp);
                             } else {
                                 ARCH(6T2);
                                 gen_helper_rbit(tmp, tmp);
                             }
                         } else {
-                            if (insn & (1 << 7))
+                            if (insn & (1 << 7)) {
+                            	instr_count_inc(ARM_INSTRUCTION_REV16);
                                 gen_rev16(tmp);
-                            else
+                            }
+                            else {
+                            	instr_count_inc(ARM_INSTRUCTION_REV);
                                 tcg_gen_bswap_i32(tmp, tmp);
+                            }
                         }
                         store_reg(s, rd, tmp);
                     } else {
@@ -6590,11 +7141,16 @@
                             tmp2 = load_reg(s, rd);
                             if (insn & (1 << 6)) {
                                 tcg_gen_sub_i32(tmp, tmp, tmp2);
+                                instr_count_inc(ARM_INSTRUCTION_SMMLS);
                             } else {
                                 tcg_gen_add_i32(tmp, tmp, tmp2);
+                                instr_count_inc(ARM_INSTRUCTION_SMMLA);
                             }
                             dead_tmp(tmp2);
                         }
+                        else {
+                        	instr_count_inc(ARM_INSTRUCTION_SMMUL);
+                        }
                         store_reg(s, rn, tmp);
                     } else {
                         if (insn & (1 << 5))
@@ -6603,8 +7159,10 @@
                         /* This addition cannot overflow.  */
                         if (insn & (1 << 6)) {
                             tcg_gen_sub_i32(tmp, tmp, tmp2);
+                            instr_index = 1;
                         } else {
                             tcg_gen_add_i32(tmp, tmp, tmp2);
+                            instr_index = 0;
                         }
                         dead_tmp(tmp2);
                         if (insn & (1 << 22)) {
@@ -6614,6 +7172,8 @@
                             dead_tmp(tmp);
                             gen_addq(s, tmp64, rd, rn);
                             gen_storeq_reg(s, rd, rn, tmp64);
+                            instr_index += ARM_INSTRUCTION_SMLALD;
+                            instr_count_inc(instr_index);
                         } else {
                             /* smuad, smusd, smlad, smlsd */
                             if (rd != 15)
@@ -6621,8 +7181,11 @@
                                 tmp2 = load_reg(s, rd);
                                 gen_helper_add_setq(tmp, tmp, tmp2);
                                 dead_tmp(tmp2);
+                                instr_index += 2; /* SMLAD, SMLSD */
                               }
                             store_reg(s, rn, tmp);
+                            instr_index += ARM_INSTRUCTION_SMUAD;
+                            instr_count_inc(instr_index);
                         }
                     }
                     break;
@@ -6639,12 +7202,15 @@
                             tmp2 = load_reg(s, rd);
                             tcg_gen_add_i32(tmp, tmp, tmp2);
                             dead_tmp(tmp2);
+                            instr_count_inc(ARM_INSTRUCTION_USADA8);
                         }
+                        else instr_count_inc(ARM_INSTRUCTION_USAD8);
                         store_reg(s, rn, tmp);
                         break;
                     case 0x20: case 0x24: case 0x28: case 0x2c:
                         /* Bitfield insert/clear.  */
                         ARCH(6T2);
+                        instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                         shift = (insn >> 7) & 0x1f;
                         i = (insn >> 16) & 0x1f;
                         i = i + 1 - shift;
@@ -6664,6 +7230,7 @@
                     case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
                     case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
                         ARCH(6T2);
+                        instr_count_inc(ARM_INSTRUCTION_UNKNOWN);
                         tmp = load_reg(s, rm);
                         shift = (insn >> 7) & 0x1f;
                         i = ((insn >> 16) & 0x1f) + 1;
@@ -6705,17 +7272,23 @@
             if (insn & (1 << 20)) {
                 /* load */
                 if (insn & (1 << 22)) {
+                    instr_count_inc(ARM_INSTRUCTION_LDRB);
                     tmp = gen_ld8u(tmp2, i);
                 } else {
+                    instr_count_inc(ARM_INSTRUCTION_LDR);
                     tmp = gen_ld32(tmp2, i);
                 }
             } else {
                 /* store */
                 tmp = load_reg(s, rd);
-                if (insn & (1 << 22))
+                if (insn & (1 << 22)) {
+                    instr_count_inc(ARM_INSTRUCTION_STRB);
                     gen_st8(tmp, tmp2, i);
-                else
+                }
+                else {
+                    instr_count_inc(ARM_INSTRUCTION_STR);
                     gen_st32(tmp, tmp2, i);
+                }
             }
             if (!(insn & (1 << 24))) {
                 gen_add_data_offset(s, insn, tmp2);
@@ -6740,6 +7313,17 @@
                 TCGv loaded_var;
                 /* load/store multiple words */
                 /* XXX: store correct base if write back */
+                switch (insn & 0x00500000 >> 20) {
+                case 0x0: instr_count_inc(ARM_INSTRUCTION_STM1); break;
+                case 0x1: instr_count_inc(ARM_INSTRUCTION_LDM1); break;
+                case 0x4: instr_count_inc(ARM_INSTRUCTION_STM2); break;
+                case 0x5:
+					if (insn & (1 << 15))
+						instr_count_inc(ARM_INSTRUCTION_LDM3);
+					else
+						instr_count_inc(ARM_INSTRUCTION_LDM2);
+					break;
+                }
                 user = 0;
                 if (insn & (1 << 22)) {
                     if (IS_USER(s))
@@ -6854,14 +7438,15 @@
         case 0xb:
             {
                 int32_t offset;
-
                 /* branch (and link) */
                 val = (int32_t)s->pc;
                 if (insn & (1 << 24)) {
+                	instr_count_inc(ARM_INSTRUCTION_B);
                     tmp = new_tmp();
                     tcg_gen_movi_i32(tmp, val);
                     store_reg(s, 14, tmp);
                 }
+                else instr_count_inc(ARM_INSTRUCTION_B);
                 offset = (((int32_t)insn << 8) >> 8);
                 val += (offset << 2) + 4;
                 gen_jmp(s, val);
@@ -6871,11 +7456,13 @@
         case 0xd:
         case 0xe:
             /* Coprocessor.  */
+        	instr_count_inc(ARM_INSTRUCTION_COPROCESSOR);
             if (disas_coproc_insn(env, s, insn))
                 goto illegal_op;
             break;
         case 0xf:
             /* swi */
+        	instr_count_inc(ARM_INSTRUCTION_SWI);
             gen_set_pc_im(s->pc);
             s->is_jmp = DISAS_SWI;
             break;
@@ -6888,6 +7475,11 @@
             break;
         }
     }
+/*
+    if (*load_offset == offsetof(CPUState, instr_count[ARM_INSTRUCTION_NOT_INSTRUMENTED])) {
+    	fprintf(stderr, "Unknown instruction: %x\n", insn);
+    }
+*/
 }
 
 /* Return true if this is a Thumb-2 logical op.  */
@@ -7995,13 +8587,19 @@
 
 static void disas_thumb_insn(CPUState *env, DisasContext *s)
 {
-    uint32_t val, insn, op, rm, rn, rd, shift, cond;
+    uint32_t val, insn, op, rm, rn, rd, shift, cond, instr_index;
     int32_t offset;
     int i;
     TCGv tmp;
     TCGv tmp2;
     TCGv addr;
 
+    instr_index = 0;
+
+    instr_count_inc_init(ARM_THUMB_INSTRUCTION_COUNTER_OFFSET,
+    		ARM_THUMB_INSTRUCTION_NOT_INSTRUMENTED);
+
+
     if (s->condexec_mask) {
         cond = s->condexec_cond;
         s->condlabel = gen_new_label();
@@ -8022,18 +8620,25 @@
             gen_movl_T0_reg(s, rn);
             if (insn & (1 << 10)) {
                 /* immediate */
+            	instr_index = 0;
                 gen_op_movl_T1_im((insn >> 6) & 7);
             } else {
                 /* reg */
+            	instr_index = 2; /* ADD3 / SUB3  */
                 rm = (insn >> 6) & 7;
                 gen_movl_T1_reg(s, rm);
             }
             if (insn & (1 << 9)) {
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_SUB1 + instr_index);
                 if (s->condexec_mask)
                     gen_op_subl_T0_T1();
                 else
                     gen_op_subl_T0_T1_cc();
             } else {
+            	if ((insn >> 6) & 7) {
+            		instr_count_inc(ARM_THUMB_INSTRUCTION_ADD1 + instr_index);
+            	}
+            	else instr_count_inc(ARM_THUMB_INSTRUCTION_MOV2);
                 if (s->condexec_mask)
                     gen_op_addl_T0_T1();
                 else
@@ -8042,6 +8647,17 @@
             gen_movl_reg_T0(s, rd);
         } else {
             /* shift immediate */
+        	switch (op) {
+        	case 0x0:
+        		instr_count_inc(ARM_THUMB_INSTRUCTION_LSL1);
+        		break;
+        	case 0x1:
+        		instr_count_inc(ARM_THUMB_INSTRUCTION_LSR1);
+				break;
+        	case 0x2:
+        		instr_count_inc(ARM_THUMB_INSTRUCTION_ASR1);
+        		break;
+        	}
             rm = (insn >> 3) & 7;
             shift = (insn >> 6) & 0x1f;
             tmp = load_reg(s, rm);
@@ -8063,19 +8679,23 @@
         }
         switch (op) {
         case 0: /* mov */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_MOV1);
             if (!s->condexec_mask)
                 gen_op_logic_T0_cc();
             break;
         case 1: /* cmp */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_CMP1);
             gen_op_subl_T0_T1_cc();
             break;
         case 2: /* add */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_ADD2);
             if (s->condexec_mask)
                 gen_op_addl_T0_T1();
             else
                 gen_op_addl_T0_T1_cc();
             break;
         case 3: /* sub */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_SUB2);
             if (s->condexec_mask)
                 gen_op_subl_T0_T1();
             else
@@ -8087,6 +8707,7 @@
         break;
     case 4:
         if (insn & (1 << 11)) {
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_LDR3);
             rd = (insn >> 8) & 7;
             /* load pc-relative.  Bit 1 of PC is ignored.  */
             val = s->pc + 2 + ((insn & 0xff) * 4);
@@ -8105,23 +8726,28 @@
             op = (insn >> 8) & 3;
             switch (op) {
             case 0: /* add */
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_ADD4);
                 gen_movl_T0_reg(s, rd);
                 gen_movl_T1_reg(s, rm);
                 gen_op_addl_T0_T1();
                 gen_movl_reg_T0(s, rd);
                 break;
             case 1: /* cmp */
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_CMP3);
                 gen_movl_T0_reg(s, rd);
                 gen_movl_T1_reg(s, rm);
                 gen_op_subl_T0_T1_cc();
                 break;
             case 2: /* mov/cpy */
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_MOV3);
                 gen_movl_T0_reg(s, rm);
                 gen_movl_reg_T0(s, rd);
                 break;
             case 3:/* branch [and link] exchange thumb register */
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_BX);
                 tmp = load_reg(s, rm);
                 if (insn & (1 << 7)) {
+                	instr_count_inc(ARM_THUMB_INSTRUCTION_BLX2);
                     val = (uint32_t)s->pc | 1;
                     tmp2 = new_tmp();
                     tcg_gen_movi_i32(tmp2, val);
@@ -8155,16 +8781,19 @@
         gen_movl_T1_reg(s, rm);
         switch (op) {
         case 0x0: /* and */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_AND);
             gen_op_andl_T0_T1();
             if (!s->condexec_mask)
                 gen_op_logic_T0_cc();
             break;
         case 0x1: /* eor */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_EOR);
             gen_op_xorl_T0_T1();
             if (!s->condexec_mask)
                 gen_op_logic_T0_cc();
             break;
         case 0x2: /* lsl */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_LSL2);
             if (s->condexec_mask) {
                 gen_helper_shl(cpu_T[1], cpu_T[1], cpu_T[0]);
             } else {
@@ -8173,6 +8802,7 @@
             }
             break;
         case 0x3: /* lsr */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_LSR2);
             if (s->condexec_mask) {
                 gen_helper_shr(cpu_T[1], cpu_T[1], cpu_T[0]);
             } else {
@@ -8181,6 +8811,7 @@
             }
             break;
         case 0x4: /* asr */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_ASR2);
             if (s->condexec_mask) {
                 gen_helper_sar(cpu_T[1], cpu_T[1], cpu_T[0]);
             } else {
@@ -8189,19 +8820,22 @@
             }
             break;
         case 0x5: /* adc */
-            if (s->condexec_mask)
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_ADC);
+        	if (s->condexec_mask)
                 gen_adc_T0_T1();
             else
                 gen_op_adcl_T0_T1_cc();
             break;
         case 0x6: /* sbc */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_SBC);
             if (s->condexec_mask)
                 gen_sbc_T0_T1();
             else
                 gen_op_sbcl_T0_T1_cc();
             break;
         case 0x7: /* ror */
-            if (s->condexec_mask) {
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_ROR);
+        	if (s->condexec_mask) {
                 gen_helper_ror(cpu_T[1], cpu_T[1], cpu_T[0]);
             } else {
                 gen_helper_ror_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
@@ -8209,40 +8843,48 @@
             }
             break;
         case 0x8: /* tst */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_TST);
             gen_op_andl_T0_T1();
             gen_op_logic_T0_cc();
             rd = 16;
             break;
         case 0x9: /* neg */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_NEG);
             if (s->condexec_mask)
                 tcg_gen_neg_i32(cpu_T[0], cpu_T[1]);
             else
                 gen_op_subl_T0_T1_cc();
             break;
         case 0xa: /* cmp */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_CMP2);
             gen_op_subl_T0_T1_cc();
             rd = 16;
             break;
         case 0xb: /* cmn */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_CMN);
             gen_op_addl_T0_T1_cc();
             rd = 16;
             break;
         case 0xc: /* orr */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_ORR);
             gen_op_orl_T0_T1();
             if (!s->condexec_mask)
                 gen_op_logic_T0_cc();
             break;
         case 0xd: /* mul */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_MUL);
             gen_op_mull_T0_T1();
             if (!s->condexec_mask)
                 gen_op_logic_T0_cc();
             break;
         case 0xe: /* bic */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_BIC);
             gen_op_bicl_T0_T1();
             if (!s->condexec_mask)
                 gen_op_logic_T0_cc();
             break;
         case 0xf: /* mvn */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_MVN);
             gen_op_notl_T1();
             if (!s->condexec_mask)
                 gen_op_logic_T1_cc();
@@ -8274,27 +8916,35 @@
 
         switch (op) {
         case 0: /* str */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_STR2);
             gen_st32(tmp, addr, IS_USER(s));
             break;
         case 1: /* strh */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_STRH2);
             gen_st16(tmp, addr, IS_USER(s));
             break;
         case 2: /* strb */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_STRB2);
             gen_st8(tmp, addr, IS_USER(s));
             break;
         case 3: /* ldrsb */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_LDRSB);
             tmp = gen_ld8s(addr, IS_USER(s));
             break;
         case 4: /* ldr */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_LDR2);
             tmp = gen_ld32(addr, IS_USER(s));
             break;
         case 5: /* ldrh */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_LDRH2);
             tmp = gen_ld16u(addr, IS_USER(s));
             break;
         case 6: /* ldrb */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_LDRB2);
             tmp = gen_ld8u(addr, IS_USER(s));
             break;
         case 7: /* ldrsh */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_LDRSH);
             tmp = gen_ld16s(addr, IS_USER(s));
             break;
         }
@@ -8313,10 +8963,12 @@
 
         if (insn & (1 << 11)) {
             /* load */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_LDR1);
             tmp = gen_ld32(addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_STR1);
             tmp = load_reg(s, rd);
             gen_st32(tmp, addr, IS_USER(s));
         }
@@ -8333,10 +8985,12 @@
 
         if (insn & (1 << 11)) {
             /* load */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_LDRB1);
             tmp = gen_ld8u(addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_STRB1);
             tmp = load_reg(s, rd);
             gen_st8(tmp, addr, IS_USER(s));
         }
@@ -8353,10 +9007,12 @@
 
         if (insn & (1 << 11)) {
             /* load */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_LDRH1);
             tmp = gen_ld16u(addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_STRH1);
             tmp = load_reg(s, rd);
             gen_st16(tmp, addr, IS_USER(s));
         }
@@ -8372,10 +9028,12 @@
 
         if (insn & (1 << 11)) {
             /* load */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_LDR4);
             tmp = gen_ld32(addr, IS_USER(s));
             store_reg(s, rd, tmp);
         } else {
             /* store */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_STR3);
             tmp = load_reg(s, rd);
             gen_st32(tmp, addr, IS_USER(s));
         }
@@ -8387,9 +9045,11 @@
         rd = (insn >> 8) & 7;
         if (insn & (1 << 11)) {
             /* SP */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_ADD6);
             tmp = load_reg(s, 13);
         } else {
             /* PC. bit 1 is ignored.  */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_ADD5);
             tmp = new_tmp();
             tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
         }
@@ -8406,8 +9066,11 @@
             /* adjust stack pointer */
             tmp = load_reg(s, 13);
             val = (insn & 0x7f) * 4;
-            if (insn & (1 << 7))
+            if (insn & (1 << 7)) {
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_ADD7);
                 val = -(int32_t)val;
+            }
+            else instr_count_inc(ARM_THUMB_INSTRUCTION_SUB4);
             tcg_gen_addi_i32(tmp, tmp, val);
             store_reg(s, 13, tmp);
             break;
@@ -8418,10 +9081,22 @@
             rm = (insn >> 3) & 7;
             tmp = load_reg(s, rm);
             switch ((insn >> 6) & 3) {
-            case 0: gen_sxth(tmp); break;
-            case 1: gen_sxtb(tmp); break;
-            case 2: gen_uxth(tmp); break;
-            case 3: gen_uxtb(tmp); break;
+            case 0:
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_SXTH);
+            	gen_sxth(tmp);
+            	break;
+            case 1:
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_SXTB);
+            	gen_sxtb(tmp);
+            	break;
+            case 2:
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_UXTH);
+            	gen_uxth(tmp);
+            	break;
+            case 3:
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_UXTB);
+            	gen_uxtb(tmp);
+            	break;
             }
             store_reg(s, rd, tmp);
             break;
@@ -8443,10 +9118,12 @@
                 if (insn & (1 << i)) {
                     if (insn & (1 << 11)) {
                         /* pop */
+                    	instr_count_inc(ARM_THUMB_INSTRUCTION_POP);
                         tmp = gen_ld32(addr, IS_USER(s));
                         store_reg(s, i, tmp);
                     } else {
                         /* push */
+                    	instr_count_inc(ARM_THUMB_INSTRUCTION_PUSH);
                         tmp = load_reg(s, i);
                         gen_st32(tmp, addr, IS_USER(s));
                     }
@@ -8458,11 +9135,13 @@
             if (insn & (1 << 8)) {
                 if (insn & (1 << 11)) {
                     /* pop pc */
+                	instr_count_inc(ARM_THUMB_INSTRUCTION_POP);
                     tmp = gen_ld32(addr, IS_USER(s));
                     /* don't set the pc until the rest of the instruction
                        has completed */
                 } else {
                     /* push lr */
+                	instr_count_inc(ARM_THUMB_INSTRUCTION_PUSH);
                     tmp = load_reg(s, 14);
                     gen_st32(tmp, addr, IS_USER(s));
                 }
@@ -8506,6 +9185,7 @@
             break;
 
         case 0xe: /* bkpt */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_BKPT);
             gen_set_condexec(s);
             gen_set_pc_im(s->pc - 2);
             gen_exception(EXCP_BKPT);
@@ -8518,15 +9198,25 @@
             rd = insn & 0x7;
             tmp = load_reg(s, rn);
             switch ((insn >> 6) & 3) {
-            case 0: tcg_gen_bswap_i32(tmp, tmp); break;
-            case 1: gen_rev16(tmp); break;
-            case 3: gen_revsh(tmp); break;
+            case 0:
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_REV);
+            	tcg_gen_bswap_i32(tmp, tmp);
+            	break;
+            case 1:
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_REV16);
+            	gen_rev16(tmp);
+            	break;
+            case 3:
+            	instr_count_inc(ARM_THUMB_INSTRUCTION_REVSH);
+            	gen_revsh(tmp);
+            	break;
             default: goto illegal_op;
             }
             store_reg(s, rd, tmp);
             break;
 
         case 6: /* cps */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_CPS);
             ARCH(6);
             if (IS_USER(s))
                 break;
@@ -8568,10 +9258,12 @@
             if (insn & (1 << i)) {
                 if (insn & (1 << 11)) {
                     /* load */
+                	instr_count_inc(ARM_THUMB_INSTRUCTION_LDMIA);
                     tmp = gen_ld32(addr, IS_USER(s));
                     store_reg(s, i, tmp);
                 } else {
                     /* store */
+                	instr_count_inc(ARM_THUMB_INSTRUCTION_STMIA);
                     tmp = load_reg(s, i);
                     gen_st32(tmp, addr, IS_USER(s));
                 }
@@ -8595,6 +9287,7 @@
 
         if (cond == 0xf) {
             /* swi */
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_SWI);
             gen_set_condexec(s);
             gen_set_pc_im(s->pc);
             s->is_jmp = DISAS_SWI;
@@ -8607,6 +9300,7 @@
         gen_movl_T1_reg(s, 15);
 
         /* jump to the offset */
+        instr_count_inc(ARM_THUMB_INSTRUCTION_B1);
         val = (uint32_t)s->pc + 2;
         offset = ((int32_t)insn << 24) >> 24;
         val += offset << 1;
@@ -8615,11 +9309,13 @@
 
     case 14:
         if (insn & (1 << 11)) {
+        	instr_count_inc(ARM_THUMB_INSTRUCTION_BLX1);
             if (disas_thumb2_insn(env, s, insn))
               goto undef32;
             break;
         }
         /* unconditional branch */
+        instr_count_inc(ARM_THUMB_INSTRUCTION_B2);
         val = (uint32_t)s->pc;
         offset = ((int32_t)insn << 21) >> 21;
         val += (offset << 1) + 2;
@@ -8627,6 +9323,8 @@
         break;
 
     case 15:
+    	if (insn & (1 << 11)) instr_count_inc(ARM_THUMB_INSTRUCTION_BL);
+    	else instr_count_inc(ARM_THUMB_INSTRUCTION_UNKNOWN);
         if (disas_thumb2_insn(env, s, insn))
             goto undef32;
         break;

             reply	other threads:[~2009-05-19 15:00 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-19 14:59 Timo Töyry [this message]
2009-05-20 10:48 ` [Qemu-devel] Instruction counting instrumentation for ARM + initial patch Paul Brook
2009-05-20 12:16   ` Laurent Desnogues
2009-05-20 20:35     ` Vince Weaver
2009-05-23 13:23       ` Laurent Desnogues
2009-05-25 15:04         ` Sami Kiminki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1242745197.24234.7.camel@peak10.cs.hut.fi \
    --to=ttoyry@cs.hut.fi \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).