1 From a2b43ee78d107157684dce9f4b80168c6e7b488b Mon Sep 17 00:00:00 2001
2 From: John Stultz <johnstul@us.ibm.com>
3 Date: Fri, 3 Jul 2009 08:29:58 -0500
4 Subject: [PATCH 087/366] posix-timers: Thread posix-cpu-timers on -rt
6 posix-cpu-timer code takes non -rt safe locks in hard irq
7 context. Move it to a thread.
9 [ 3.0 fixes from Peter Zijlstra <peterz@infradead.org> ]
11 Signed-off-by: John Stultz <johnstul@us.ibm.com>
12 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
14 include/linux/init_task.h | 7 ++
15 include/linux/sched.h | 3 +
17 kernel/time/posix-cpu-timers.c | 193 ++++++++++++++++++++++++++++++++++++++++-
18 4 files changed, 202 insertions(+), 4 deletions(-)
20 diff --git a/include/linux/init_task.h b/include/linux/init_task.h
21 index 6b42f297..d8ec0f2 100644
22 --- a/include/linux/init_task.h
23 +++ b/include/linux/init_task.h
24 @@ -148,6 +148,12 @@ extern struct task_group root_task_group;
25 # define INIT_PERF_EVENTS(tsk)
28 +#ifdef CONFIG_PREEMPT_RT_BASE
29 +# define INIT_TIMER_LIST .posix_timer_list = NULL,
31 +# define INIT_TIMER_LIST
34 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
35 # define INIT_VTIME(tsk) \
36 .vtime_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.vtime_lock), \
37 @@ -240,6 +246,7 @@ extern struct task_group root_task_group;
38 .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
39 .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
40 .timer_slack_ns = 50000, /* 50 usec default slack */ \
43 [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), \
44 [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), \
45 diff --git a/include/linux/sched.h b/include/linux/sched.h
46 index fb5ee52..668b2cb 100644
47 --- a/include/linux/sched.h
48 +++ b/include/linux/sched.h
49 @@ -1556,6 +1556,9 @@ struct task_struct {
51 struct task_cputime cputime_expires;
52 struct list_head cpu_timers[3];
53 +#ifdef CONFIG_PREEMPT_RT_BASE
54 + struct task_struct *posix_timer_list;
57 /* process credentials */
58 const struct cred __rcu *real_cred; /* objective and real subjective task
59 diff --git a/kernel/fork.c b/kernel/fork.c
60 index 81550ca..3256c9e 100644
63 @@ -1239,6 +1239,9 @@ static void rt_mutex_init_task(struct task_struct *p)
65 static void posix_cpu_timers_init(struct task_struct *tsk)
67 +#ifdef CONFIG_PREEMPT_RT_BASE
68 + tsk->posix_timer_list = NULL;
70 tsk->cputime_expires.prof_exp = 0;
71 tsk->cputime_expires.virt_exp = 0;
72 tsk->cputime_expires.sched_exp = 0;
73 diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
74 index 80016b3..b7342b6 100644
75 --- a/kernel/time/posix-cpu-timers.c
76 +++ b/kernel/time/posix-cpu-timers.c
80 #include <linux/sched.h>
81 +#include <linux/sched/rt.h>
82 #include <linux/posix-timers.h>
83 #include <linux/errno.h>
84 #include <linux/math64.h>
85 @@ -650,7 +651,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
87 * Disarm any old timer after extracting its expiry time.
89 - WARN_ON_ONCE(!irqs_disabled());
90 + WARN_ON_ONCE_NONRT(!irqs_disabled());
93 old_incr = timer->it.cpu.incr;
94 @@ -1092,7 +1093,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
96 * Now re-arm for the new expiry time.
98 - WARN_ON_ONCE(!irqs_disabled());
99 + WARN_ON_ONCE_NONRT(!irqs_disabled());
101 unlock_task_sighand(p, &flags);
103 @@ -1183,13 +1184,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
104 * already updated our counts. We need to check if any timers fire now.
105 * Interrupts are disabled.
107 -void run_posix_cpu_timers(struct task_struct *tsk)
108 +static void __run_posix_cpu_timers(struct task_struct *tsk)
111 struct k_itimer *timer, *next;
114 - WARN_ON_ONCE(!irqs_disabled());
115 + WARN_ON_ONCE_NONRT(!irqs_disabled());
118 * The fast path checks that there are no expired thread or thread
119 @@ -1243,6 +1244,190 @@ void run_posix_cpu_timers(struct task_struct *tsk)
123 +#ifdef CONFIG_PREEMPT_RT_BASE
124 +#include <linux/kthread.h>
125 +#include <linux/cpu.h>
126 +DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
127 +DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
129 +static int posix_cpu_timers_thread(void *data)
131 + int cpu = (long)data;
133 + BUG_ON(per_cpu(posix_timer_task,cpu) != current);
135 + while (!kthread_should_stop()) {
136 + struct task_struct *tsk = NULL;
137 + struct task_struct *next = NULL;
139 + if (cpu_is_offline(cpu))
142 + /* grab task list */
143 + raw_local_irq_disable();
144 + tsk = per_cpu(posix_timer_tasklist, cpu);
145 + per_cpu(posix_timer_tasklist, cpu) = NULL;
146 + raw_local_irq_enable();
148 + /* its possible the list is empty, just return */
150 + set_current_state(TASK_INTERRUPTIBLE);
152 + __set_current_state(TASK_RUNNING);
156 + /* Process task list */
159 + next = tsk->posix_timer_list;
161 + /* run the task timers, clear its ptr and
164 + __run_posix_cpu_timers(tsk);
165 + tsk->posix_timer_list = NULL;
166 + put_task_struct(tsk);
168 + /* check if this is the last on the list */
177 + /* Wait for kthread_stop */
178 + set_current_state(TASK_INTERRUPTIBLE);
179 + while (!kthread_should_stop()) {
181 + set_current_state(TASK_INTERRUPTIBLE);
183 + __set_current_state(TASK_RUNNING);
187 +static inline int __fastpath_timer_check(struct task_struct *tsk)
189 + /* tsk == current, ensure it is safe to use ->signal/sighand */
190 + if (unlikely(tsk->exit_state))
193 + if (!task_cputime_zero(&tsk->cputime_expires))
196 + if (!task_cputime_zero(&tsk->signal->cputime_expires))
202 +void run_posix_cpu_timers(struct task_struct *tsk)
204 + unsigned long cpu = smp_processor_id();
205 + struct task_struct *tasklist;
207 + BUG_ON(!irqs_disabled());
208 + if(!per_cpu(posix_timer_task, cpu))
210 + /* get per-cpu references */
211 + tasklist = per_cpu(posix_timer_tasklist, cpu);
213 + /* check to see if we're already queued */
214 + if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
215 + get_task_struct(tsk);
217 + tsk->posix_timer_list = tasklist;
220 + * The list is terminated by a self-pointing
223 + tsk->posix_timer_list = tsk;
225 + per_cpu(posix_timer_tasklist, cpu) = tsk;
227 + wake_up_process(per_cpu(posix_timer_task, cpu));
232 + * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
233 + * Here we can start up the necessary migration thread for the new CPU.
235 +static int posix_cpu_thread_call(struct notifier_block *nfb,
236 + unsigned long action, void *hcpu)
238 + int cpu = (long)hcpu;
239 + struct task_struct *p;
240 + struct sched_param param;
243 + case CPU_UP_PREPARE:
244 + p = kthread_create(posix_cpu_timers_thread, hcpu,
245 + "posixcputmr/%d",cpu);
248 + p->flags |= PF_NOFREEZE;
249 + kthread_bind(p, cpu);
250 + /* Must be high prio to avoid getting starved */
251 + param.sched_priority = MAX_RT_PRIO-1;
252 + sched_setscheduler(p, SCHED_FIFO, ¶m);
253 + per_cpu(posix_timer_task,cpu) = p;
256 + /* Strictly unneccessary, as first user will wake it. */
257 + wake_up_process(per_cpu(posix_timer_task,cpu));
259 +#ifdef CONFIG_HOTPLUG_CPU
260 + case CPU_UP_CANCELED:
261 + /* Unbind it from offline cpu so it can run. Fall thru. */
262 + kthread_bind(per_cpu(posix_timer_task, cpu),
263 + cpumask_any(cpu_online_mask));
264 + kthread_stop(per_cpu(posix_timer_task,cpu));
265 + per_cpu(posix_timer_task,cpu) = NULL;
268 + kthread_stop(per_cpu(posix_timer_task,cpu));
269 + per_cpu(posix_timer_task,cpu) = NULL;
276 +/* Register at highest priority so that task migration (migrate_all_tasks)
277 + * happens before everything else.
279 +static struct notifier_block posix_cpu_thread_notifier = {
280 + .notifier_call = posix_cpu_thread_call,
284 +static int __init posix_cpu_thread_init(void)
286 + void *hcpu = (void *)(long)smp_processor_id();
287 + /* Start one for boot CPU. */
290 + /* init the per-cpu posix_timer_tasklets */
291 + for_each_possible_cpu(cpu)
292 + per_cpu(posix_timer_tasklist, cpu) = NULL;
294 + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, hcpu);
295 + posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, hcpu);
296 + register_cpu_notifier(&posix_cpu_thread_notifier);
299 +early_initcall(posix_cpu_thread_init);
300 +#else /* CONFIG_PREEMPT_RT_BASE */
301 +void run_posix_cpu_timers(struct task_struct *tsk)
303 + __run_posix_cpu_timers(tsk);
305 +#endif /* CONFIG_PREEMPT_RT_BASE */
308 * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
309 * The tsk->sighand->siglock must be held by the caller.