linux-rt-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org,
	linux-rt-users <linux-rt-users@vger.kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>,
	Carsten Emde <C.Emde@osadl.org>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
	John Kacur <jkacur@redhat.com>,
	Clark Williams <clark.williams@gmail.com>
Subject: [PATCH RT 3/3] hwlat-detector: Use thread instead of stop machine
Date: Mon, 19 Aug 2013 17:33:27 -0400	[thread overview]
Message-ID: <20130819213545.295619792@goodmis.org> (raw)
In-Reply-To: 20130819213324.405942342@goodmis.org

[-- Attachment #1: hwlat-thread.patch --]
[-- Type: text/plain, Size: 6115 bytes --]

There's no reason to use stop machine to search for hardware latency.
Simply disabling interrupts while running the loop will do enough to
check if something comes in that wasn't disabled by interrupts being
off, which is exactly what stop machine does.

Instead of using stop machine, just have the thread disable interrupts
while it checks for hardware latency.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>


Index: linux-rt.git/drivers/misc/hwlat_detector.c
===================================================================
--- linux-rt.git.orig/drivers/misc/hwlat_detector.c
+++ linux-rt.git/drivers/misc/hwlat_detector.c
@@ -41,7 +41,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/ring_buffer.h>
-#include <linux/stop_machine.h>
 #include <linux/time.h>
 #include <linux/hrtimer.h>
 #include <linux/kthread.h>
@@ -107,7 +106,6 @@ struct data;					/* Global state */
 /* Sampling functions */
 static int __buffer_add_sample(struct sample *sample);
 static struct sample *buffer_get_sample(struct sample *sample);
-static int get_sample(void *unused);
 
 /* Threading and state */
 static int kthread_fn(void *unused);
@@ -149,7 +147,7 @@ struct sample {
 	unsigned long   lost;
 };
 
-/* keep the global state somewhere. Mostly used under stop_machine. */
+/* keep the global state somewhere. */
 static struct data {
 
 	struct mutex lock;		/* protect changes */
@@ -172,7 +170,7 @@ static struct data {
  * @sample: The new latency sample value
  *
  * This receives a new latency sample and records it in a global ring buffer.
- * No additional locking is used in this case - suited for stop_machine use.
+ * No additional locking is used in this case.
  */
 static int __buffer_add_sample(struct sample *sample)
 {
@@ -229,18 +227,17 @@ static struct sample *buffer_get_sample(
 #endif
 /**
  * get_sample - sample the CPU TSC and look for likely hardware latencies
- * @unused: This is not used but is a part of the stop_machine API
  *
  * Used to repeatedly capture the CPU TSC (or similar), looking for potential
- * hardware-induced latency. Called under stop_machine, with data.lock held.
+ * hardware-induced latency. Called with interrupts disabled and with data.lock held.
  */
-static int get_sample(void *unused)
+static int get_sample(void)
 {
 	time_type start, t1, t2, last_t2;
 	s64 diff, total = 0;
 	u64 sample = 0;
 	u64 outer_sample = 0;
-	int ret = 1;
+	int ret = -1;
 
 	init_time(last_t2, 0);
 	start = time_get(); /* start timestamp */
@@ -279,10 +276,14 @@ static int get_sample(void *unused)
 
 	} while (total <= data.sample_width);
 
+	ret = 0;
+
 	/* If we exceed the threshold value, we have found a hardware latency */
 	if (sample > data.threshold || outer_sample > data.threshold) {
 		struct sample s;
 
+		ret = 1;
+
 		data.count++;
 		s.seqnum = data.count;
 		s.duration = sample;
@@ -295,7 +296,6 @@ static int get_sample(void *unused)
 			data.max_sample = sample;
 	}
 
-	ret = 0;
 out:
 	return ret;
 }
@@ -305,32 +305,30 @@ out:
  * @unused: A required part of the kthread API.
  *
  * Used to periodically sample the CPU TSC via a call to get_sample. We
- * use stop_machine, whith does (intentionally) introduce latency since we
+ * disable interrupts, which does (intentionally) introduce latency since we
  * need to ensure nothing else might be running (and thus pre-empting).
  * Obviously this should never be used in production environments.
  *
- * stop_machine will schedule us typically only on CPU0 which is fine for
- * almost every real-world hardware latency situation - but we might later
- * generalize this if we find there are any actualy systems with alternate
- * SMI delivery or other non CPU0 hardware latencies.
+ * Currently this runs on which ever CPU it was scheduled on, but most
+ * real-worald hardware latency situations occur across several CPUs,
+ * but we might later generalize this if we find there are any actualy
+ * systems with alternate SMI delivery or other hardware latencies.
  */
 static int kthread_fn(void *unused)
 {
-	int err = 0;
-	u64 interval = 0;
+	int ret;
+	u64 interval;
 
 	while (!kthread_should_stop()) {
 
 		mutex_lock(&data.lock);
 
-		err = stop_machine(get_sample, unused, 0);
-		if (err) {
-			/* Houston, we have a problem */
-			mutex_unlock(&data.lock);
-			goto err_out;
-		}
+		local_irq_disable();
+		ret = get_sample();
+		local_irq_enable();
 
-		wake_up(&data.wq); /* wake up reader(s) */
+		if (ret > 0)
+			wake_up(&data.wq); /* wake up reader(s) */
 
 		interval = data.sample_window - data.sample_width;
 		do_div(interval, USEC_PER_MSEC); /* modifies interval value */
@@ -338,15 +336,10 @@ static int kthread_fn(void *unused)
 		mutex_unlock(&data.lock);
 
 		if (msleep_interruptible(interval))
-			goto out;
+			break;
 	}
-		goto out;
-err_out:
-	printk(KERN_ERR BANNER "could not call stop_machine, disabling\n");
-	enabled = 0;
-out:
-	return err;
 
+	return 0;
 }
 
 /**
@@ -442,8 +435,7 @@ out:
  * This function provides a generic read implementation for the global state
  * "data" structure debugfs filesystem entries. It would be nice to use
  * simple_attr_read directly, but we need to make sure that the data.lock
- * spinlock is held during the actual read (even though we likely won't ever
- * actually race here as the updater runs under a stop_machine context).
+ * is held during the actual read.
  */
 static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
 				size_t cnt, loff_t *ppos, const u64 *entry)
@@ -478,8 +470,7 @@ static ssize_t simple_data_read(struct f
  * This function provides a generic write implementation for the global state
  * "data" structure debugfs filesystem entries. It would be nice to use
  * simple_attr_write directly, but we need to make sure that the data.lock
- * spinlock is held during the actual write (even though we likely won't ever
- * actually race here as the updater runs under a stop_machine context).
+ * is held during the actual write.
  */
 static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
 				 size_t cnt, loff_t *ppos, u64 *entry)

  parent reply	other threads:[~2013-08-19 21:33 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-08-19 21:33 [PATCH RT 0/3] hwlat-detector: Have it actually find hardware latency Steven Rostedt
2013-08-19 21:33 ` [PATCH RT 1/3] hwlat-detector: Update hwlat_detector to add outer loop detection Steven Rostedt
2013-08-19 21:33 ` [PATCH RT 2/3] hwlat-detector: Use trace_clock_local if available Steven Rostedt
2013-08-19 21:33 ` Steven Rostedt [this message]
2013-08-21 15:59 ` [PATCH RT 0/3] hwlat-detector: Have it actually find hardware latency Sebastian Andrzej Siewior
2013-08-30  5:57 ` [PATCH RT] hwlat-detector: Don't ignore threshold module parameter Mike Galbraith
2013-08-30 14:39   ` [PATCH RT] rt,ipc,sem: fix -rt livelock Mike Galbraith
2013-08-31  5:27     ` Mike Galbraith
2013-09-06  7:08       ` Mike Galbraith
2013-09-10  6:30         ` Mike Galbraith
2013-09-11 14:03           ` Manfred Spraul
2013-09-12  7:40             ` Mike Galbraith
2013-09-12 19:15               ` Steven Rostedt
2013-09-13  3:20                 ` Mike Galbraith
2013-09-13  3:33                   ` Steven Rostedt
2013-09-13  4:48                     ` Mike Galbraith
2013-09-13  4:36                   ` Mike Galbraith
2013-09-13 12:56                     ` Mike Galbraith
2013-09-12 18:23   ` [PATCH RT] hwlat-detector: Don't ignore threshold module parameter Steven Rostedt
2013-10-04 10:30   ` Sebastian Andrzej Siewior

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130819213545.295619792@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=C.Emde@osadl.org \
    --cc=bigeasy@linutronix.de \
    --cc=clark.williams@gmail.com \
    --cc=jkacur@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).