1 From 2f13a0c567fcf0a7b1d8678fcd9dcbd5e5dfa47f Mon Sep 17 00:00:00 2001
2 From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
3 Date: Thu, 29 Aug 2013 18:21:04 +0200
4 Subject: [PATCH 127/366] ptrace: fix ptrace vs tasklist_lock race
6 As explained by Alexander Fyodorov <halcy@yandex.ru>:
8 |read_lock(&tasklist_lock) in ptrace_stop() is converted to mutex on RT kernel,
9 |and it can remove __TASK_TRACED from task->state (by moving it to
10 |task->saved_state). If parent does wait() on child followed by a sys_ptrace
11 |call, the following race can happen:
13 |- child sets __TASK_TRACED in ptrace_stop()
14 |- parent does wait() which eventually calls wait_task_stopped() and returns
16 |- child blocks on read_lock(&tasklist_lock) in ptrace_stop() and moves
17 | __TASK_TRACED flag to saved_state
18 |- parent calls sys_ptrace, which calls ptrace_check_attach() and wait_task_inactive()
20 The patch is based on his initial patch where an additional check is
21 added in case the __TASK_TRACED moved to ->saved_state. The pi_lock is
22 taken in case the caller is interrupted between looking into ->state and
25 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
27 include/linux/sched.h | 48 +++++++++++++++++++++++++++++++++++++++++++++---
28 kernel/ptrace.c | 7 ++++++-
29 kernel/sched/core.c | 17 +++++++++++++++--
30 3 files changed, 66 insertions(+), 6 deletions(-)
32 diff --git a/include/linux/sched.h b/include/linux/sched.h
33 index b7657cd..2b59b02 100644
34 --- a/include/linux/sched.h
35 +++ b/include/linux/sched.h
36 @@ -243,10 +243,7 @@ extern char ___assert_task_state[1 - 2*!!(
37 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
38 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
40 -#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
41 #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
42 -#define task_is_stopped_or_traced(task) \
43 - ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
44 #define task_contributes_to_load(task) \
45 ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
46 (task->flags & PF_FROZEN) == 0 && \
47 @@ -2998,6 +2995,51 @@ static inline int signal_pending_state(long state, struct task_struct *p)
48 return (state & TASK_INTERRUPTIBLE) || __fatal_signal_pending(p);
51 +static inline bool __task_is_stopped_or_traced(struct task_struct *task)
53 + if (task->state & (__TASK_STOPPED | __TASK_TRACED))
55 +#ifdef CONFIG_PREEMPT_RT_FULL
56 + if (task->saved_state & (__TASK_STOPPED | __TASK_TRACED))
62 +static inline bool task_is_stopped_or_traced(struct task_struct *task)
64 + bool traced_stopped;
66 +#ifdef CONFIG_PREEMPT_RT_FULL
67 + unsigned long flags;
69 + raw_spin_lock_irqsave(&task->pi_lock, flags);
70 + traced_stopped = __task_is_stopped_or_traced(task);
71 + raw_spin_unlock_irqrestore(&task->pi_lock, flags);
73 + traced_stopped = __task_is_stopped_or_traced(task);
75 + return traced_stopped;
78 +static inline bool task_is_traced(struct task_struct *task)
80 + bool traced = false;
82 + if (task->state & __TASK_TRACED)
84 +#ifdef CONFIG_PREEMPT_RT_FULL
85 + /* in case the task is sleeping on tasklist_lock */
86 + raw_spin_lock_irq(&task->pi_lock);
87 + if (task->state & __TASK_TRACED)
89 + else if (task->saved_state & __TASK_TRACED)
91 + raw_spin_unlock_irq(&task->pi_lock);
97 * cond_resched() and cond_resched_lock(): latency reduction via
98 * explicit rescheduling in places that are safe. The return
99 diff --git a/kernel/ptrace.c b/kernel/ptrace.c
100 index 3189e51..b852051 100644
101 --- a/kernel/ptrace.c
102 +++ b/kernel/ptrace.c
103 @@ -129,7 +129,12 @@ static bool ptrace_freeze_traced(struct task_struct *task)
105 spin_lock_irq(&task->sighand->siglock);
106 if (task_is_traced(task) && !__fatal_signal_pending(task)) {
107 - task->state = __TASK_TRACED;
108 + raw_spin_lock_irq(&task->pi_lock);
109 + if (task->state & __TASK_TRACED)
110 + task->state = __TASK_TRACED;
112 + task->saved_state = __TASK_TRACED;
113 + raw_spin_unlock_irq(&task->pi_lock);
116 spin_unlock_irq(&task->sighand->siglock);
117 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
118 index 7df7852..7b7a0d3 100644
119 --- a/kernel/sched/core.c
120 +++ b/kernel/sched/core.c
121 @@ -1439,6 +1439,18 @@ out:
125 +static bool check_task_state(struct task_struct *p, long match_state)
127 + bool match = false;
129 + raw_spin_lock_irq(&p->pi_lock);
130 + if (p->state == match_state || p->saved_state == match_state)
132 + raw_spin_unlock_irq(&p->pi_lock);
138 * wait_task_inactive - wait for a thread to unschedule.
140 @@ -1483,7 +1495,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
141 * is actually now running somewhere else!
143 while (task_running(rq, p)) {
144 - if (match_state && unlikely(p->state != match_state))
145 + if (match_state && !check_task_state(p, match_state))
149 @@ -1498,7 +1510,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
150 running = task_running(rq, p);
151 queued = task_on_rq_queued(p);
153 - if (!match_state || p->state == match_state)
154 + if (!match_state || p->state == match_state ||
155 + p->saved_state == match_state)
156 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
157 task_rq_unlock(rq, p, &flags);