1 From 297c2542280631e1640fe91fe2e01511ae232c9f Mon Sep 17 00:00:00 2001
2 From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
3 Date: Mon, 4 Nov 2013 13:21:10 -0800
4 Subject: [PATCH 235/366] rcu: Eliminate softirq processing from rcutree
6 Running RCU out of softirq is a problem for some workloads that would
7 like to manage RCU core processing independently of other softirq work,
8 for example, setting kthread priority. This commit therefore moves the
9 RCU core work from softirq to a per-CPU/per-flavor SCHED_OTHER kthread
10 named rcuc. The SCHED_OTHER approach avoids the scalability problems
11 that appeared with the earlier attempt to move RCU core processing to
12 from softirq to kthreads. That said, kernels built with RCU_BOOST=y
13 will run the rcuc kthreads at the RCU-boosting priority.
15 Reported-by: Thomas Gleixner <tglx@linutronix.de>
16 Tested-by: Mike Galbraith <bitbucket@online.de>
17 Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
18 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
20 kernel/rcu/tree.c | 112 ++++++++++++++++++++++++++++++----
21 kernel/rcu/tree.h | 5 +-
22 kernel/rcu/tree_plugin.h | 153 +++++++----------------------------------------
23 3 files changed, 123 insertions(+), 147 deletions(-)
25 diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
26 index a0b2db4..5359091 100644
27 --- a/kernel/rcu/tree.c
28 +++ b/kernel/rcu/tree.c
30 #include <linux/random.h>
31 #include <linux/trace_events.h>
32 #include <linux/suspend.h>
33 +#include <linux/delay.h>
34 +#include <linux/gfp.h>
35 +#include <linux/oom.h>
36 +#include <linux/smpboot.h>
37 +#include "../time/tick-internal.h"
41 @@ -2956,18 +2961,17 @@ __rcu_process_callbacks(struct rcu_state *rsp)
43 * Do RCU core processing for the current CPU.
45 -static void rcu_process_callbacks(struct softirq_action *unused)
46 +static void rcu_process_callbacks(void)
48 struct rcu_state *rsp;
50 if (cpu_is_offline(smp_processor_id()))
52 - trace_rcu_utilization(TPS("Start RCU core"));
53 for_each_rcu_flavor(rsp)
54 __rcu_process_callbacks(rsp);
55 - trace_rcu_utilization(TPS("End RCU core"));
58 +static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
60 * Schedule RCU callback invocation. If the specified type of RCU
61 * does not support RCU priority boosting, just do a direct call,
62 @@ -2979,18 +2983,105 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
64 if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
66 - if (likely(!rsp->boost)) {
67 - rcu_do_batch(rsp, rdp);
70 - invoke_rcu_callbacks_kthread();
71 + rcu_do_batch(rsp, rdp);
74 +static void rcu_wake_cond(struct task_struct *t, int status)
77 + * If the thread is yielding, only wake it when this
78 + * is invoked from idle
80 + if (t && (status != RCU_KTHREAD_YIELDING || is_idle_task(current)))
85 + * Wake up this CPU's rcuc kthread to do RCU core processing.
87 static void invoke_rcu_core(void)
89 - if (cpu_online(smp_processor_id()))
90 - raise_softirq(RCU_SOFTIRQ);
91 + unsigned long flags;
92 + struct task_struct *t;
94 + if (!cpu_online(smp_processor_id()))
96 + local_irq_save(flags);
97 + __this_cpu_write(rcu_cpu_has_work, 1);
98 + t = __this_cpu_read(rcu_cpu_kthread_task);
99 + if (t != NULL && current != t)
100 + rcu_wake_cond(t, __this_cpu_read(rcu_cpu_kthread_status));
101 + local_irq_restore(flags);
104 +static void rcu_cpu_kthread_park(unsigned int cpu)
106 + per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
109 +static int rcu_cpu_kthread_should_run(unsigned int cpu)
111 + return __this_cpu_read(rcu_cpu_has_work);
115 + * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
116 + * RCU softirq used in flavors and configurations of RCU that do not
117 + * support RCU priority boosting.
119 +static void rcu_cpu_kthread(unsigned int cpu)
121 + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
122 + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
125 + for (spincnt = 0; spincnt < 10; spincnt++) {
126 + trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
127 + local_bh_disable();
128 + *statusp = RCU_KTHREAD_RUNNING;
129 + this_cpu_inc(rcu_cpu_kthread_loops);
130 + local_irq_disable();
133 + local_irq_enable();
135 + rcu_process_callbacks();
138 + trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
139 + *statusp = RCU_KTHREAD_WAITING;
143 + *statusp = RCU_KTHREAD_YIELDING;
144 + trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
145 + schedule_timeout_interruptible(2);
146 + trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
147 + *statusp = RCU_KTHREAD_WAITING;
150 +static struct smp_hotplug_thread rcu_cpu_thread_spec = {
151 + .store = &rcu_cpu_kthread_task,
152 + .thread_should_run = rcu_cpu_kthread_should_run,
153 + .thread_fn = rcu_cpu_kthread,
154 + .thread_comm = "rcuc/%u",
155 + .setup = rcu_cpu_kthread_setup,
156 + .park = rcu_cpu_kthread_park,
160 + * Spawn per-CPU RCU core processing kthreads.
162 +static int __init rcu_spawn_core_kthreads(void)
166 + for_each_possible_cpu(cpu)
167 + per_cpu(rcu_cpu_has_work, cpu) = 0;
168 + BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
171 +early_initcall(rcu_spawn_core_kthreads);
174 * Handle any core-RCU processing required by a call_rcu() invocation.
175 @@ -4609,7 +4700,6 @@ void __init rcu_init(void)
177 rcu_dump_rcu_node_tree(&rcu_sched_state);
178 __rcu_init_preempt();
179 - open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
182 * We don't need protection against CPU-hotplug here because
183 diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
184 index e7e0939..588509d 100644
185 --- a/kernel/rcu/tree.h
186 +++ b/kernel/rcu/tree.h
187 @@ -563,12 +563,10 @@ extern struct rcu_state rcu_bh_state;
188 extern struct rcu_state rcu_preempt_state;
189 #endif /* #ifdef CONFIG_PREEMPT_RCU */
191 -#ifdef CONFIG_RCU_BOOST
192 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
193 DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
194 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
195 DECLARE_PER_CPU(char, rcu_cpu_has_work);
196 -#endif /* #ifdef CONFIG_RCU_BOOST */
198 #ifndef RCU_TREE_NONCORE
200 @@ -588,10 +586,9 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
201 static void __init __rcu_init_preempt(void);
202 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
203 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
204 -static void invoke_rcu_callbacks_kthread(void);
205 static bool rcu_is_callbacks_kthread(void);
206 +static void rcu_cpu_kthread_setup(unsigned int cpu);
207 #ifdef CONFIG_RCU_BOOST
208 -static void rcu_preempt_do_callbacks(void);
209 static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
210 struct rcu_node *rnp);
211 #endif /* #ifdef CONFIG_RCU_BOOST */
212 diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
213 index 86af123..246cbd8 100644
214 --- a/kernel/rcu/tree_plugin.h
215 +++ b/kernel/rcu/tree_plugin.h
217 * Paul E. McKenney <paulmck@linux.vnet.ibm.com>
220 -#include <linux/delay.h>
221 -#include <linux/gfp.h>
222 -#include <linux/oom.h>
223 -#include <linux/smpboot.h>
224 -#include <linux/jiffies.h>
225 -#include "../time/tick-internal.h"
227 #ifdef CONFIG_RCU_BOOST
229 #include "../locking/rtmutex_common.h"
232 - * Control variables for per-CPU and per-rcu_node kthreads. These
233 - * handle all flavors of RCU.
235 -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
236 -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
237 -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
238 -DEFINE_PER_CPU(char, rcu_cpu_has_work);
240 #else /* #ifdef CONFIG_RCU_BOOST */
243 @@ -56,6 +40,14 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work);
245 #endif /* #else #ifdef CONFIG_RCU_BOOST */
248 + * Control variables for per-CPU and per-rcu_node kthreads. These
249 + * handle all flavors of RCU.
251 +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
252 +DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
253 +DEFINE_PER_CPU(char, rcu_cpu_has_work);
255 #ifdef CONFIG_RCU_NOCB_CPU
256 static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
257 static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
258 @@ -646,15 +638,6 @@ static void rcu_preempt_check_callbacks(void)
259 t->rcu_read_unlock_special.b.need_qs = true;
262 -#ifdef CONFIG_RCU_BOOST
264 -static void rcu_preempt_do_callbacks(void)
266 - rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
269 -#endif /* #ifdef CONFIG_RCU_BOOST */
272 * Queue a preemptible-RCU callback for invocation after a grace period.
274 @@ -931,6 +914,19 @@ void exit_rcu(void)
276 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
279 + * If boosting, set rcuc kthreads to realtime priority.
281 +static void rcu_cpu_kthread_setup(unsigned int cpu)
283 +#ifdef CONFIG_RCU_BOOST
284 + struct sched_param sp;
286 + sp.sched_priority = kthread_prio;
287 + sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
288 +#endif /* #ifdef CONFIG_RCU_BOOST */
291 #ifdef CONFIG_RCU_BOOST
293 #include "../locking/rtmutex_common.h"
294 @@ -962,16 +958,6 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)
296 #endif /* #else #ifdef CONFIG_RCU_TRACE */
298 -static void rcu_wake_cond(struct task_struct *t, int status)
301 - * If the thread is yielding, only wake it when this
302 - * is invoked from idle
304 - if (status != RCU_KTHREAD_YIELDING || is_idle_task(current))
305 - wake_up_process(t);
309 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
310 * or ->boost_tasks, advancing the pointer to the next task in the
311 @@ -1116,23 +1102,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
315 - * Wake up the per-CPU kthread to invoke RCU callbacks.
317 -static void invoke_rcu_callbacks_kthread(void)
319 - unsigned long flags;
321 - local_irq_save(flags);
322 - __this_cpu_write(rcu_cpu_has_work, 1);
323 - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
324 - current != __this_cpu_read(rcu_cpu_kthread_task)) {
325 - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
326 - __this_cpu_read(rcu_cpu_kthread_status));
328 - local_irq_restore(flags);
332 * Is the current CPU running the RCU-callbacks kthread?
333 * Caller must have preemption disabled.
335 @@ -1187,67 +1156,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
339 -static void rcu_kthread_do_work(void)
341 - rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
342 - rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
343 - rcu_preempt_do_callbacks();
346 -static void rcu_cpu_kthread_setup(unsigned int cpu)
348 - struct sched_param sp;
350 - sp.sched_priority = kthread_prio;
351 - sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
354 -static void rcu_cpu_kthread_park(unsigned int cpu)
356 - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
359 -static int rcu_cpu_kthread_should_run(unsigned int cpu)
361 - return __this_cpu_read(rcu_cpu_has_work);
365 - * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
366 - * RCU softirq used in flavors and configurations of RCU that do not
367 - * support RCU priority boosting.
369 -static void rcu_cpu_kthread(unsigned int cpu)
371 - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
372 - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
375 - for (spincnt = 0; spincnt < 10; spincnt++) {
376 - trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
377 - local_bh_disable();
378 - *statusp = RCU_KTHREAD_RUNNING;
379 - this_cpu_inc(rcu_cpu_kthread_loops);
380 - local_irq_disable();
383 - local_irq_enable();
385 - rcu_kthread_do_work();
388 - trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
389 - *statusp = RCU_KTHREAD_WAITING;
393 - *statusp = RCU_KTHREAD_YIELDING;
394 - trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
395 - schedule_timeout_interruptible(2);
396 - trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
397 - *statusp = RCU_KTHREAD_WAITING;
401 * Set the per-rcu_node kthread's affinity to cover all CPUs that are
402 * served by the rcu_node in question. The CPU hotplug lock is still
403 @@ -1277,26 +1185,12 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
404 free_cpumask_var(cm);
407 -static struct smp_hotplug_thread rcu_cpu_thread_spec = {
408 - .store = &rcu_cpu_kthread_task,
409 - .thread_should_run = rcu_cpu_kthread_should_run,
410 - .thread_fn = rcu_cpu_kthread,
411 - .thread_comm = "rcuc/%u",
412 - .setup = rcu_cpu_kthread_setup,
413 - .park = rcu_cpu_kthread_park,
417 * Spawn boost kthreads -- called as soon as the scheduler is running.
419 static void __init rcu_spawn_boost_kthreads(void)
421 struct rcu_node *rnp;
424 - for_each_possible_cpu(cpu)
425 - per_cpu(rcu_cpu_has_work, cpu) = 0;
426 - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
427 rcu_for_each_leaf_node(rcu_state_p, rnp)
428 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
430 @@ -1319,11 +1213,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
431 raw_spin_unlock_irqrestore(&rnp->lock, flags);
434 -static void invoke_rcu_callbacks_kthread(void)
439 static bool rcu_is_callbacks_kthread(void)