All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stephane Eranian <eranian@google.com>
To: linux-kernel@vger.kernel.org
Cc: peterz@infradead.org, mingo@elte.hu, ak@linux.intel.com,
	acme@redhat.com, jolsa@redhat.com, ming.m.lin@intel.com
Subject: [Patch v1 03/10] perf: add generic memory sampling interface
Date: Mon, 29 Oct 2012 16:15:45 +0100	[thread overview]
Message-ID: <1351523752-4215-4-git-send-email-eranian@google.com> (raw)
In-Reply-To: <1351523752-4215-1-git-send-email-eranian@google.com>

This patch adds PERF_SAMPLE_COST and PERF_SAMPLE_DSRC.
The first collects a cost associated with the sampled
event. In case of memory access, the cost would be
the latency of the load, otherwise it defaults to
the sampling period.

PERF_SAMPLE_DSRC collects the data source, i.e., where
did the data associated with the sampled instruction
come from. Information is stored in a perf_mem_dsrc
structure. It contains opcode, mem level, tlb, snoop,
lock information, subject to availability in hardware.

Signed-off-by: Stephane Eranian <eranian@google.com>
---
 include/linux/perf_event.h      |    4 +++
 include/uapi/linux/perf_event.h |   70 ++++++++++++++++++++++++++++++++++++++-
 kernel/events/core.c            |   12 +++++++
 3 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 484cfbc..a323ee2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -579,6 +579,8 @@ struct perf_sample_data {
 		u32	reserved;
 	}				cpu_entry;
 	u64				period;
+	u64				cost;
+	union  perf_mem_dsrc		dsrc;
 	struct perf_callchain_entry	*callchain;
 	struct perf_raw_record		*raw;
 	struct perf_branch_stack	*br_stack;
@@ -597,6 +599,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
 	data->regs_user.regs = NULL;
 	data->stack_user_size = 0;
+	data->cost = period; /* by default */
+	data->dsrc.val = 0;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 4f63c05..2a3401b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -132,8 +132,10 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
 	PERF_SAMPLE_REGS_USER			= 1U << 12,
 	PERF_SAMPLE_STACK_USER			= 1U << 13,
+	PERF_SAMPLE_COST			= 1U << 14,
+	PERF_SAMPLE_DSRC			= 1U << 15,
 
-	PERF_SAMPLE_MAX = 1U << 14,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 16,		/* non-ABI */
 };
 
 /*
@@ -587,6 +589,9 @@ enum perf_event_type {
 	 * 	{ u64			size;
 	 * 	  char			data[size];
 	 * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
+	 *
+	 *	{ u64			cost;  } && PERF_SAMPLE_COST
+	 *	{ u64			dsrc;  } && PERF_SAMPLE_DSRC
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
@@ -612,4 +617,67 @@ enum perf_callchain_context {
 #define PERF_FLAG_FD_OUTPUT		(1U << 1)
 #define PERF_FLAG_PID_CGROUP		(1U << 2) /* pid=cgroup id, per-cpu mode only */
 
+union perf_mem_dsrc {
+	__u64 val;
+	struct {
+		__u64   mem_op:5,	/* type of opcode */
+			mem_lvl:14,	/* memory hierarchy level */
+			mem_snoop:5,	/* snoop mode */
+			mem_lock:2,	/* lock instr */
+			mem_dtlb:7,	/* tlb access */
+			mem_rsvd:31;
+	};
+};
+
+/* type of opcode (load/store/prefetch,code) */
+#define PERF_MEM_OP_NA		0x01 /* not available */
+#define PERF_MEM_OP_LOAD	0x02 /* load instruction */
+#define PERF_MEM_OP_STORE	0x04 /* store instruction */
+#define PERF_MEM_OP_PFETCH	0x08 /* prefetch */
+#define PERF_MEM_OP_EXEC	0x10 /* code (execution) */
+#define PERF_MEM_OP_SHIFT	0
+
+/* memory hierarchy (memory level, hit or miss) */
+#define PERF_MEM_LVL_NA		0x01  /* not available */
+#define PERF_MEM_LVL_HIT	0x02  /* hit level */
+#define PERF_MEM_LVL_MISS	0x04  /* miss level  */
+#define PERF_MEM_LVL_L1		0x08  /* L1 */
+#define PERF_MEM_LVL_LFB	0x10  /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2		0x20  /* L2 hit */
+#define PERF_MEM_LVL_L3		0x40  /* L3 hit */
+#define PERF_MEM_LVL_LOC_RAM	0x80  /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1	0x100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2	0x200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1	0x400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2	0x800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO		0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT	5
+
+/* snoop mode */
+#define PERF_MEM_SNOOP_NA	0x01 /* not available */
+#define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
+#define PERF_MEM_SNOOP_HIT	0x04 /* snoop hit */
+#define PERF_MEM_SNOOP_MISS	0x08 /* snoop miss */
+#define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT	19
+
+/* locked instruction */
+#define PERF_MEM_LOCK_NA	0x01 /* not available */
+#define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */
+#define PERF_MEM_LOCK_SHIFT	24
+
+/* TLB access */
+#define PERF_MEM_TLB_NA		0x01 /* not available */
+#define PERF_MEM_TLB_HIT	0x02 /* hit level */
+#define PERF_MEM_TLB_MISS	0x04 /* miss level */
+#define PERF_MEM_TLB_L1		0x08 /* L1 */
+#define PERF_MEM_TLB_L2		0x10 /* L2 */
+#define PERF_MEM_TLB_WK		0x20 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS		0x40 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT	26
+
+#define PERF_MEM_S(a, s) \
+	(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
+
 #endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dbccf83..a1cf8f2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -955,6 +955,12 @@ static void perf_event__header_size(struct perf_event *event)
 	if (sample_type & PERF_SAMPLE_READ)
 		size += event->read_size;
 
+	if (sample_type & PERF_SAMPLE_COST)
+		size += sizeof(data->cost);
+
+	if (sample_type & PERF_SAMPLE_DSRC)
+		size += sizeof(data->dsrc.val);
+
 	event->header_size = size;
 }
 
@@ -4169,6 +4175,12 @@ void perf_output_sample(struct perf_output_handle *handle,
 		perf_output_sample_ustack(handle,
 					  data->stack_user_size,
 					  data->regs_user.regs);
+
+	if (sample_type & PERF_SAMPLE_COST)
+		perf_output_put(handle, data->cost);
+
+	if (sample_type & PERF_SAMPLE_DSRC)
+		perf_output_put(handle, data->dsrc.val);
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
-- 
1.7.9.5


  parent reply	other threads:[~2012-10-29 15:16 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-29 15:15 [Patch v1 00/10] perf: add memory access sampling support Stephane Eranian
2012-10-29 15:15 ` [Patch v1 01/10] perf/x86: improve sysfs event mapping with event string Stephane Eranian
2012-10-29 19:25   ` Andi Kleen
2012-10-29 15:15 ` [Patch v1 02/10] perf/x86: add flags to event constraints Stephane Eranian
2012-10-29 15:15 ` Stephane Eranian [this message]
2012-10-29 15:15 ` [Patch v1 04/10] perf/x86: add memory profiling via PEBS Load Latency Stephane Eranian
2012-10-29 15:23   ` Peter Zijlstra
2012-10-29 15:24     ` Stephane Eranian
2012-10-29 15:35   ` Peter Zijlstra
2012-10-29 15:39     ` Stephane Eranian
2012-10-29 15:38   ` Peter Zijlstra
2012-10-29 15:43     ` Stephane Eranian
2012-10-29 15:44       ` Peter Zijlstra
2012-10-29 19:42   ` Andi Kleen
2012-10-29 20:39     ` Stephane Eranian
2012-10-29 20:44       ` Peter Zijlstra
2012-10-29 21:16       ` Andi Kleen
2012-10-29 21:32         ` Stephane Eranian
2012-10-29 21:56           ` Andi Kleen
2012-10-30  8:43   ` Namhyung Kim
2012-10-29 15:15 ` [Patch v1 05/10] perf/x86: export PEBS load latency threshold register to sysfs Stephane Eranian
2012-10-29 15:15 ` [Patch v1 06/10] perf/x86: add support for PEBS Precise Store Stephane Eranian
2012-10-29 15:40   ` Peter Zijlstra
2012-10-29 15:44     ` Stephane Eranian
2012-10-31  5:21   ` Namhyung Kim
2012-10-31 13:28     ` Stephane Eranian
2012-10-29 15:15 ` [Patch v1 07/10] perf tools: add mem access sampling core support Stephane Eranian
2012-10-29 16:55   ` Andi Kleen
2012-10-29 17:00     ` Stephane Eranian
2012-10-31  5:51   ` Namhyung Kim
2012-10-31 13:30     ` Stephane Eranian
2012-10-29 15:15 ` [Patch v1 08/10] perf report: add support for mem access profiling Stephane Eranian
2012-10-31  6:01   ` Namhyung Kim
2012-10-29 15:15 ` [Patch v1 09/10] perf record: " Stephane Eranian
2012-10-29 15:15 ` [Patch v1 10/10] perf tools: add new mem command for memory " Stephane Eranian
2012-10-31  6:57   ` Namhyung Kim
2012-10-31 14:23     ` Stephane Eranian

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1351523752-4215-4-git-send-email-eranian@google.com \
    --to=eranian@google.com \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ming.m.lin@intel.com \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.