1 From 0080f28fa00ac8120b0f3f65f9169f3c083a0882 Mon Sep 17 00:00:00 2001
2 From: Steven Rostedt <rostedt@goodmis.org>
3 Date: Tue, 4 Mar 2014 12:28:32 -0500
4 Subject: [PATCH 163/366] cpu_chill: Add a UNINTERRUPTIBLE hrtimer_nanosleep
6 We hit another bug that was caused by switching cpu_chill() from
7 msleep() to hrtimer_nanosleep().
9 This time it is a livelock. The problem is that hrtimer_nanosleep()
10 calls schedule with the state == TASK_INTERRUPTIBLE. But these means
11 that if a signal is pending, the scheduler wont schedule, and will
12 simply change the current task state back to TASK_RUNNING. This
13 nullifies the whole point of cpu_chill() in the first place. That is,
14 if a task is spinning on a try_lock() and it preempted the owner of the
15 lock, if it has a signal pending, it will never give up the CPU to let
16 the owner of the lock run.
18 I made a static function __hrtimer_nanosleep() that takes a fifth
19 parameter "state", which determines the task state of that the
20 nanosleep() will be in. The normal hrtimer_nanosleep() will act the
21 same, but cpu_chill() will call the __hrtimer_nanosleep() directly with
22 the TASK_UNINTERRUPTIBLE state.
24 cpu_chill() only cares that the first sleep happens, and does not care
25 about the state of the restart schedule (in hrtimer_nanosleep_restart).
27 Reported-by: Ulrich Obergfell <uobergfe@redhat.com>
28 Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
29 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
31 kernel/time/hrtimer.c | 25 ++++++++++++++++++-------
32 1 file changed, 18 insertions(+), 7 deletions(-)
34 diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
35 index 2fc2569..4f54506 100644
36 --- a/kernel/time/hrtimer.c
37 +++ b/kernel/time/hrtimer.c
38 @@ -1681,12 +1681,13 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
40 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
42 -static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode)
43 +static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode,
44 + unsigned long state)
46 hrtimer_init_sleeper(t, current);
49 - set_current_state(TASK_INTERRUPTIBLE);
50 + set_current_state(state);
51 hrtimer_start_expires(&t->timer, mode);
54 @@ -1728,7 +1729,8 @@ long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
56 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
58 - if (do_nanosleep(&t, HRTIMER_MODE_ABS))
59 + /* cpu_chill() does not care about restart state. */
60 + if (do_nanosleep(&t, HRTIMER_MODE_ABS, TASK_INTERRUPTIBLE))
63 rmtp = restart->nanosleep.rmtp;
64 @@ -1745,8 +1747,10 @@ out:
68 -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
69 - const enum hrtimer_mode mode, const clockid_t clockid)
71 +__hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
72 + const enum hrtimer_mode mode, const clockid_t clockid,
73 + unsigned long state)
75 struct restart_block *restart;
76 struct hrtimer_sleeper t;
77 @@ -1759,7 +1763,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
79 hrtimer_init_on_stack(&t.timer, clockid, mode);
80 hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack);
81 - if (do_nanosleep(&t, mode))
82 + if (do_nanosleep(&t, mode, state))
85 /* Absolute timers do not update the rmtp value and restart: */
86 @@ -1786,6 +1790,12 @@ out:
90 +long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
91 + const enum hrtimer_mode mode, const clockid_t clockid)
93 + return __hrtimer_nanosleep(rqtp, rmtp, mode, clockid, TASK_INTERRUPTIBLE);
96 SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
97 struct timespec __user *, rmtp)
99 @@ -1812,7 +1822,8 @@ void cpu_chill(void)
100 unsigned int freeze_flag = current->flags & PF_NOFREEZE;
102 current->flags |= PF_NOFREEZE;
103 - hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
104 + __hrtimer_nanosleep(&tu, NULL, HRTIMER_MODE_REL, CLOCK_MONOTONIC,
105 + TASK_UNINTERRUPTIBLE);
107 current->flags &= ~PF_NOFREEZE;