1 From 62184f546d0f099d46a0a503bcc9b5e35c9e04e7 Mon Sep 17 00:00:00 2001
2 From: Steven Rostedt <rostedt@goodmis.org>
3 Date: Mon, 19 Aug 2013 17:33:27 -0400
4 Subject: [PATCH 027/366] hwlat-detector: Use thread instead of stop machine
6 There's no reason to use stop machine to search for hardware latency.
7 Simply disabling interrupts while running the loop will do enough to
8 check if something comes in that wasn't disabled by interrupts being
9 off, which is exactly what stop machine does.
11 Instead of using stop machine, just have the thread disable interrupts
12 while it checks for hardware latency.
14 Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
15 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
17 drivers/misc/hwlat_detector.c | 60 +++++++++++++++++++------------------------
18 1 file changed, 26 insertions(+), 34 deletions(-)
20 diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c
21 index 0fcc0e3..6e88113 100644
22 --- a/drivers/misc/hwlat_detector.c
23 +++ b/drivers/misc/hwlat_detector.c
25 #include <linux/module.h>
26 #include <linux/init.h>
27 #include <linux/ring_buffer.h>
28 -#include <linux/stop_machine.h>
29 #include <linux/time.h>
30 #include <linux/hrtimer.h>
31 #include <linux/kthread.h>
32 @@ -107,7 +106,6 @@ struct data; /* Global state */
33 /* Sampling functions */
34 static int __buffer_add_sample(struct sample *sample);
35 static struct sample *buffer_get_sample(struct sample *sample);
36 -static int get_sample(void *unused);
38 /* Threading and state */
39 static int kthread_fn(void *unused);
40 @@ -149,7 +147,7 @@ struct sample {
44 -/* keep the global state somewhere. Mostly used under stop_machine. */
45 +/* keep the global state somewhere. */
48 struct mutex lock; /* protect changes */
49 @@ -172,7 +170,7 @@ static struct data {
50 * @sample: The new latency sample value
52 * This receives a new latency sample and records it in a global ring buffer.
53 - * No additional locking is used in this case - suited for stop_machine use.
54 + * No additional locking is used in this case.
56 static int __buffer_add_sample(struct sample *sample)
58 @@ -229,18 +227,18 @@ static struct sample *buffer_get_sample(struct sample *sample)
61 * get_sample - sample the CPU TSC and look for likely hardware latencies
62 - * @unused: This is not used but is a part of the stop_machine API
64 * Used to repeatedly capture the CPU TSC (or similar), looking for potential
65 - * hardware-induced latency. Called under stop_machine, with data.lock held.
66 + * hardware-induced latency. Called with interrupts disabled and with
69 -static int get_sample(void *unused)
70 +static int get_sample(void)
72 time_type start, t1, t2, last_t2;
79 init_time(last_t2, 0);
80 start = time_get(); /* start timestamp */
81 @@ -279,10 +277,14 @@ static int get_sample(void *unused)
83 } while (total <= data.sample_width);
87 /* If we exceed the threshold value, we have found a hardware latency */
88 if (sample > data.threshold || outer_sample > data.threshold) {
94 s.seqnum = data.count;
96 @@ -295,7 +297,6 @@ static int get_sample(void *unused)
97 data.max_sample = sample;
104 @@ -305,32 +306,30 @@ out:
105 * @unused: A required part of the kthread API.
107 * Used to periodically sample the CPU TSC via a call to get_sample. We
108 - * use stop_machine, whith does (intentionally) introduce latency since we
109 + * disable interrupts, which does (intentionally) introduce latency since we
110 * need to ensure nothing else might be running (and thus pre-empting).
111 * Obviously this should never be used in production environments.
113 - * stop_machine will schedule us typically only on CPU0 which is fine for
114 - * almost every real-world hardware latency situation - but we might later
115 - * generalize this if we find there are any actualy systems with alternate
116 - * SMI delivery or other non CPU0 hardware latencies.
117 + * Currently this runs on which ever CPU it was scheduled on, but most
118 + * real-worald hardware latency situations occur across several CPUs,
119 + * but we might later generalize this if we find there are any actualy
120 + * systems with alternate SMI delivery or other hardware latencies.
122 static int kthread_fn(void *unused)
129 while (!kthread_should_stop()) {
131 mutex_lock(&data.lock);
133 - err = stop_machine(get_sample, unused, 0);
135 - /* Houston, we have a problem */
136 - mutex_unlock(&data.lock);
139 + local_irq_disable();
140 + ret = get_sample();
141 + local_irq_enable();
143 - wake_up(&data.wq); /* wake up reader(s) */
145 + wake_up(&data.wq); /* wake up reader(s) */
147 interval = data.sample_window - data.sample_width;
148 do_div(interval, USEC_PER_MSEC); /* modifies interval value */
149 @@ -338,15 +337,10 @@ static int kthread_fn(void *unused)
150 mutex_unlock(&data.lock);
152 if (msleep_interruptible(interval))
158 - pr_err(BANNER "could not call stop_machine, disabling\n");
167 @@ -442,8 +436,7 @@ out:
168 * This function provides a generic read implementation for the global state
169 * "data" structure debugfs filesystem entries. It would be nice to use
170 * simple_attr_read directly, but we need to make sure that the data.lock
171 - * spinlock is held during the actual read (even though we likely won't ever
172 - * actually race here as the updater runs under a stop_machine context).
173 + * is held during the actual read.
175 static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
176 size_t cnt, loff_t *ppos, const u64 *entry)
177 @@ -478,8 +471,7 @@ static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
178 * This function provides a generic write implementation for the global state
179 * "data" structure debugfs filesystem entries. It would be nice to use
180 * simple_attr_write directly, but we need to make sure that the data.lock
181 - * spinlock is held during the actual write (even though we likely won't ever
182 - * actually race here as the updater runs under a stop_machine context).
183 + * is held during the actual write.
185 static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
186 size_t cnt, loff_t *ppos, u64 *entry)