linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Vikas Shivappa <vikas.shivappa@linux.intel.com>
To: vikas.shivappa@intel.com, vikas.shivappa@linux.intel.com
Cc: linux-kernel@vger.kernel.org, x86@kernel.org, hpa@zytor.com,
	tglx@linutronix.de, mingo@kernel.org, peterz@infradead.org,
	ravi.v.shankar@intel.com, tony.luck@intel.com,
	fenghua.yu@intel.com, h.peter.anvin@intel.com
Subject: [PATCH 6/6] x86/mbm: Add support for MBM counter overflow handling
Date: Tue,  1 Mar 2016 15:48:28 -0800	[thread overview]
Message-ID: <1456876108-28770-7-git-send-email-vikas.shivappa@linux.intel.com> (raw)
In-Reply-To: <1456876108-28770-1-git-send-email-vikas.shivappa@linux.intel.com>

This patch adds a per package timer which periodically updates the
Memory bandwidth counters for the events that are currently active.
Current patch has a periodic timer every 1s since the SDM guarantees
that the counter will not overflow in 1s but this time can be definitely
improved by calibrating on the system. The overflow is really a function
of the max memory b/w that the socket can support, max counter value and
scaling factor.

Reviewed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Vikas Shivappa <vikas.shivappa@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_cqm.c | 132 ++++++++++++++++++++++++++++-
 1 file changed, 130 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index fa5ec85..2870fc7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -18,9 +18,15 @@
  * value
  */
 #define MBM_CNTR_MAX		0xffffff
+/*
+ * Guaranteed time in ms as per SDM where MBM counters will not overflow.
+ */
+#define MBM_CTR_OVERFLOW_TIME	1000
+
 static u32 cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
 static bool cqm_enabled, mbm_enabled;
+unsigned int mbm_socket_max;
 
 /**
  * struct intel_pqr_state - State cache for the PQR MSR
@@ -48,6 +54,7 @@ struct intel_pqr_state {
  * interrupts disabled, which is sufficient for the protection.
  */
 static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
+static struct hrtimer *mbm_timers;
 /**
  * struct sample - mbm event's (local or total) data
  * @interval_start Time this interval began
@@ -1122,6 +1129,84 @@ static void __intel_mbm_event_count(void *info)
 	atomic64_add(val, &rr->value);
 }
 
+static enum hrtimer_restart mbm_hrtimer_handle(struct hrtimer *hrtimer)
+{
+	struct perf_event *iter, *iter1;
+	int ret = HRTIMER_RESTART;
+	struct list_head *head;
+	unsigned long flags;
+	u32 grp_rmid;
+
+	/*
+	 * Need to cache_lock as the timer Event Select MSR reads
+	 * can race with the mbm/cqm count() and mbm_init() reads.
+	 */
+	raw_spin_lock_irqsave(&cache_lock, flags);
+
+	if (list_empty(&cache_groups)) {
+		ret = HRTIMER_NORESTART;
+		goto out;
+	}
+
+	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
+		grp_rmid = iter->hw.cqm_rmid;
+		if (!__rmid_valid(grp_rmid))
+			continue;
+		if (is_mbm_event(iter->attr.config))
+			update_sample(grp_rmid, iter->attr.config, 0);
+
+		head = &iter->hw.cqm_group_entry;
+		if (list_empty(head))
+			continue;
+		list_for_each_entry(iter1, head, hw.cqm_group_entry) {
+			if (!iter1->hw.is_group_event)
+				break;
+			if (is_mbm_event(iter1->attr.config))
+				update_sample(iter1->hw.cqm_rmid,
+					      iter1->attr.config, 0);
+		}
+	}
+
+	hrtimer_forward_now(hrtimer, ms_to_ktime(MBM_CTR_OVERFLOW_TIME));
+out:
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+	return ret;
+}
+
+static void __mbm_start_timer(void *info)
+{
+	hrtimer_start(&mbm_timers[pkg_id], ms_to_ktime(MBM_CTR_OVERFLOW_TIME),
+			     HRTIMER_MODE_REL_PINNED);
+}
+
+static void __mbm_stop_timer(void *info)
+{
+	hrtimer_cancel(&mbm_timers[pkg_id]);
+}
+
+static void mbm_start_timers(void)
+{
+	on_each_cpu_mask(&cqm_cpumask, __mbm_start_timer, NULL, 1);
+}
+
+static void mbm_stop_timers(void)
+{
+	on_each_cpu_mask(&cqm_cpumask, __mbm_stop_timer, NULL, 1);
+}
+
+static void mbm_hrtimer_init(void)
+{
+	struct hrtimer *hr;
+	int i;
+
+	for (i = 0; i < mbm_socket_max; i++) {
+		hr = &mbm_timers[i];
+		hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hr->function = mbm_hrtimer_handle;
+	}
+}
+
 static u64 intel_cqm_event_count(struct perf_event *event)
 {
 	unsigned long flags;
@@ -1251,8 +1336,14 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
 static void intel_cqm_event_destroy(struct perf_event *event)
 {
 	struct perf_event *group_other = NULL;
+	unsigned long flags;
 
 	mutex_lock(&cache_mutex);
+	/*
+	* Hold the cache_lock as mbm timer handlers could be
+	* scanning the list of events.
+	*/
+	raw_spin_lock_irqsave(&cache_lock, flags);
 
 	/*
 	 * If there's another event in this group...
@@ -1284,6 +1375,14 @@ static void intel_cqm_event_destroy(struct perf_event *event)
 		}
 	}
 
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
+
+	/*
+	 * Stop the mbm overflow timers when the last event is destroyed.
+	*/
+	if (mbm_enabled && list_empty(&cache_groups))
+		mbm_stop_timers();
+
 	mutex_unlock(&cache_mutex);
 }
 
@@ -1291,6 +1390,7 @@ static int intel_cqm_event_init(struct perf_event *event)
 {
 	struct perf_event *group = NULL;
 	bool rotate = false;
+	unsigned long flags;
 
 	if (event->attr.type != intel_cqm_pmu.type)
 		return -ENOENT;
@@ -1316,9 +1416,21 @@ static int intel_cqm_event_init(struct perf_event *event)
 
 	mutex_lock(&cache_mutex);
 
+	/*
+	 * Start the mbm overflow timers when the first event is created.
+	*/
+	if (mbm_enabled && list_empty(&cache_groups))
+		mbm_start_timers();
+
 	/* Will also set rmid */
 	intel_cqm_setup_event(event, &group);
 
+	/*
+	* Hold the cache_lock as mbm timer handlers be
+	* scanning the list of events.
+	*/
+	raw_spin_lock_irqsave(&cache_lock, flags);
+
 	if (group) {
 		list_add_tail(&event->hw.cqm_group_entry,
 			      &group->hw.cqm_group_entry);
@@ -1337,6 +1449,7 @@ static int intel_cqm_event_init(struct perf_event *event)
 			rotate = true;
 	}
 
+	raw_spin_unlock_irqrestore(&cache_lock, flags);
 	mutex_unlock(&cache_mutex);
 
 	if (rotate)
@@ -1604,15 +1717,30 @@ static int intel_mbm_init(void)
 {
 	int ret = 0, array_size, maxid = cqm_max_rmid + 1;
 
-	array_size = sizeof(struct sample) * maxid * topology_max_packages();
+	mbm_socket_max = topology_max_packages();
+	array_size = sizeof(struct sample) * maxid * mbm_socket_max;
 	mbm_local = kmalloc(array_size, GFP_KERNEL);
 	if (!mbm_local)
 		return -ENOMEM;
 
 	mbm_total = kmalloc(array_size, GFP_KERNEL);
 	if (!mbm_total) {
-		kfree(mbm_local);
 		ret = -ENOMEM;
+		goto out;
+	}
+
+	array_size = sizeof(struct hrtimer) * mbm_socket_max;
+	mbm_timers = kmalloc(array_size, GFP_KERNEL);
+	if (!mbm_timers) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	mbm_hrtimer_init();
+
+out:
+	if (ret) {
+		kfree(mbm_local);
+		kfree(mbm_total);
 	}
 
 	return ret;
-- 
1.9.1

  parent reply	other threads:[~2016-03-01 23:48 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-03-01 23:48 [PATCH V5 0/6] Intel memory b/w monitoring support Vikas Shivappa
2016-03-01 23:48 ` [PATCH 1/6] x86,perf/cqm: Fix cqm handling of grouping events into a cache_group Vikas Shivappa
2016-03-07 23:04   ` Peter Zijlstra
2016-03-10  0:18     ` Vikas Shivappa
2016-03-01 23:48 ` [PATCH 2/6] x86,perf/cqm: Fix cqm memory leak and notifier leak Vikas Shivappa
2016-03-02  8:00   ` Thomas Gleixner
2016-03-02 17:58     ` Vikas Shivappa
2016-03-02 23:53   ` Vikas Shivappa
2016-03-08  9:22     ` Thomas Gleixner
2016-03-08 19:36       ` Vikas Shivappa
2016-03-01 23:48 ` [PATCH 3/6] x86/mbm: Intel Memory B/W Monitoring enumeration and init Vikas Shivappa
2016-03-02  8:04   ` Thomas Gleixner
2016-03-02 17:59     ` Vikas Shivappa
2016-03-02 21:31       ` Vikas Shivappa
2016-03-02 23:56   ` Vikas Shivappa
2016-03-03  7:35     ` Thomas Gleixner
2016-03-03 18:26       ` Vikas Shivappa
2016-03-03 18:37         ` Thomas Gleixner
2016-03-08  9:25           ` Thomas Gleixner
2016-03-08 19:36             ` Vikas Shivappa
2016-03-01 23:48 ` [PATCH 4/6] x86/mbm: Memory bandwidth monitoring event management Vikas Shivappa
2016-03-07 23:03   ` Peter Zijlstra
2016-03-07 23:27     ` Luck, Tony
2016-03-08  8:49       ` Peter Zijlstra
2016-03-10 22:46         ` Vikas Shivappa
2016-03-10 22:49     ` Vikas Shivappa
2016-03-01 23:48 ` [PATCH 5/6] x86/mbm: RMID Recycling MBM changes Vikas Shivappa
2016-03-01 23:48 ` Vikas Shivappa [this message]
2016-03-02 23:58   ` [PATCH 6/6] x86/mbm: Add support for MBM counter overflow handling Vikas Shivappa
  -- strict thread matches above, loose matches on Subject: below --
2016-03-10 23:32 [PATCH V6 0/6] Intel memory b/w monitoring support Vikas Shivappa
2016-03-10 23:32 ` [PATCH 6/6] x86/mbm: Add support for MBM counter overflow handling Vikas Shivappa
2016-03-11 19:26   ` Tony Luck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1456876108-28770-7-git-send-email-vikas.shivappa@linux.intel.com \
    --to=vikas.shivappa@linux.intel.com \
    --cc=fenghua.yu@intel.com \
    --cc=h.peter.anvin@intel.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=tglx@linutronix.de \
    --cc=tony.luck@intel.com \
    --cc=vikas.shivappa@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).