1 From c8fb8b6dfe32d17346a70acfcd58a8022acbc869 Mon Sep 17 00:00:00 2001
2 From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
3 Date: Mon, 28 Oct 2013 09:36:37 +0100
4 Subject: [PATCH 126/366] rtmutex: Add RT aware ww locks
7 | --------------------------------------------------------------------------
9 | ---------------------
10 | ww api failures: ok | ok | ok |
11 | ww contexts mixing: ok | ok |
12 | finishing ww context: ok | ok | ok | ok |
13 | locking mismatches: ok | ok | ok |
14 | EDEADLK handling: ok | ok | ok | ok | ok | ok | ok | ok | ok | ok |
15 | spinlock nest unlocked: ok |
16 | -----------------------------------------------------
17 | |block | try |context|
18 | -----------------------------------------------------
19 | context: ok | ok | ok |
21 | block: ok | ok | ok |
22 | spinlock: ok | ok | ok |
24 Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
26 kernel/locking/rtmutex.c | 251 ++++++++++++++++++++++++++++++++++++++++++-----
27 1 file changed, 226 insertions(+), 25 deletions(-)
29 diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
30 index 8275f47..7b59f63 100644
31 --- a/kernel/locking/rtmutex.c
32 +++ b/kernel/locking/rtmutex.c
34 #include <linux/sched/rt.h>
35 #include <linux/sched/deadline.h>
36 #include <linux/timer.h>
37 +#include <linux/ww_mutex.h>
39 #include "rtmutex_common.h"
41 @@ -1221,6 +1222,40 @@ EXPORT_SYMBOL(__rt_spin_lock_init);
43 #endif /* PREEMPT_RT_FULL */
45 +#ifdef CONFIG_PREEMPT_RT_FULL
46 + static inline int __sched
47 +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
49 + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
50 + struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
55 + if (unlikely(ctx == hold_ctx))
58 + if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
59 + (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
60 +#ifdef CONFIG_DEBUG_MUTEXES
61 + DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
62 + ctx->contending_lock = ww;
70 + static inline int __sched
71 +__mutex_lock_check_stamp(struct rt_mutex *lock, struct ww_acquire_ctx *ctx)
80 try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
81 struct rt_mutex_waiter *waiter)
82 @@ -1478,7 +1513,8 @@ void rt_mutex_adjust_pi(struct task_struct *task)
84 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
85 struct hrtimer_sleeper *timeout,
86 - struct rt_mutex_waiter *waiter)
87 + struct rt_mutex_waiter *waiter,
88 + struct ww_acquire_ctx *ww_ctx)
92 @@ -1501,6 +1537,12 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
96 + if (ww_ctx && ww_ctx->acquired > 0) {
97 + ret = __mutex_lock_check_stamp(lock, ww_ctx);
102 raw_spin_unlock(&lock->wait_lock);
104 debug_rt_mutex_print_deadlock(waiter);
105 @@ -1535,13 +1577,90 @@ static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
109 +static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
110 + struct ww_acquire_ctx *ww_ctx)
112 +#ifdef CONFIG_DEBUG_MUTEXES
114 + * If this WARN_ON triggers, you used ww_mutex_lock to acquire,
115 + * but released with a normal mutex_unlock in this call.
117 + * This should never happen, always use ww_mutex_unlock.
119 + DEBUG_LOCKS_WARN_ON(ww->ctx);
122 + * Not quite done after calling ww_acquire_done() ?
124 + DEBUG_LOCKS_WARN_ON(ww_ctx->done_acquire);
126 + if (ww_ctx->contending_lock) {
128 + * After -EDEADLK you tried to
129 + * acquire a different ww_mutex? Bad!
131 + DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock != ww);
134 + * You called ww_mutex_lock after receiving -EDEADLK,
135 + * but 'forgot' to unlock everything else first?
137 + DEBUG_LOCKS_WARN_ON(ww_ctx->acquired > 0);
138 + ww_ctx->contending_lock = NULL;
142 + * Naughty, using a different class will lead to undefined behavior!
144 + DEBUG_LOCKS_WARN_ON(ww_ctx->ww_class != ww->ww_class);
146 + ww_ctx->acquired++;
149 +#ifdef CONFIG_PREEMPT_RT_FULL
150 +static void ww_mutex_account_lock(struct rt_mutex *lock,
151 + struct ww_acquire_ctx *ww_ctx)
153 + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base.lock);
154 + struct rt_mutex_waiter *waiter, *n;
157 + * This branch gets optimized out for the common case,
158 + * and is only important for ww_mutex_lock.
160 + ww_mutex_lock_acquired(ww, ww_ctx);
164 + * Give any possible sleeping processes the chance to wake up,
165 + * so they can recheck if they have to back off.
167 + rbtree_postorder_for_each_entry_safe(waiter, n, &lock->waiters,
169 + /* XXX debug rt mutex waiter wakeup */
171 + BUG_ON(waiter->lock != lock);
172 + rt_mutex_wake_waiter(waiter);
178 +static void ww_mutex_account_lock(struct rt_mutex *lock,
179 + struct ww_acquire_ctx *ww_ctx)
186 * Slow path lock function:
189 rt_mutex_slowlock(struct rt_mutex *lock, int state,
190 struct hrtimer_sleeper *timeout,
191 - enum rtmutex_chainwalk chwalk)
192 + enum rtmutex_chainwalk chwalk,
193 + struct ww_acquire_ctx *ww_ctx)
195 struct rt_mutex_waiter waiter;
197 @@ -1552,6 +1671,8 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
199 /* Try to acquire the lock again: */
200 if (try_to_take_rt_mutex(lock, current, NULL)) {
202 + ww_mutex_account_lock(lock, ww_ctx);
203 raw_spin_unlock(&lock->wait_lock);
206 @@ -1566,13 +1687,23 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
209 /* sleep on the mutex */
210 - ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
211 + ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
214 + /* ww_mutex received EDEADLK, let it become EALREADY */
215 + ret = __mutex_lock_check_stamp(lock, ww_ctx);
220 __set_current_state(TASK_RUNNING);
221 if (rt_mutex_has_waiters(lock))
222 remove_waiter(lock, &waiter);
223 - rt_mutex_handle_deadlock(ret, chwalk, &waiter);
224 + /* ww_mutex want to report EDEADLK/EALREADY, let them */
226 + rt_mutex_handle_deadlock(ret, chwalk, &waiter);
227 + } else if (ww_ctx) {
228 + ww_mutex_account_lock(lock, ww_ctx);
232 @@ -1701,31 +1832,36 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
235 rt_mutex_fastlock(struct rt_mutex *lock, int state,
236 + struct ww_acquire_ctx *ww_ctx,
237 int (*slowfn)(struct rt_mutex *lock, int state,
238 struct hrtimer_sleeper *timeout,
239 - enum rtmutex_chainwalk chwalk))
240 + enum rtmutex_chainwalk chwalk,
241 + struct ww_acquire_ctx *ww_ctx))
243 if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
244 rt_mutex_deadlock_account_lock(lock, current);
247 - return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
248 + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK,
253 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
254 struct hrtimer_sleeper *timeout,
255 enum rtmutex_chainwalk chwalk,
256 + struct ww_acquire_ctx *ww_ctx,
257 int (*slowfn)(struct rt_mutex *lock, int state,
258 struct hrtimer_sleeper *timeout,
259 - enum rtmutex_chainwalk chwalk))
260 + enum rtmutex_chainwalk chwalk,
261 + struct ww_acquire_ctx *ww_ctx))
263 if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
264 likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
265 rt_mutex_deadlock_account_lock(lock, current);
268 - return slowfn(lock, state, timeout, chwalk);
269 + return slowfn(lock, state, timeout, chwalk, ww_ctx);
273 @@ -1772,7 +1908,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock)
277 - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
278 + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock);
280 EXPORT_SYMBOL_GPL(rt_mutex_lock);
282 @@ -1789,7 +1925,7 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
286 - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
287 + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock);
289 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
291 @@ -1802,7 +1938,7 @@ int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
294 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
295 - RT_MUTEX_FULL_CHAINWALK,
296 + RT_MUTEX_FULL_CHAINWALK, NULL,
300 @@ -1821,7 +1957,7 @@ int __sched rt_mutex_lock_killable(struct rt_mutex *lock)
304 - return rt_mutex_fastlock(lock, TASK_KILLABLE, rt_mutex_slowlock);
305 + return rt_mutex_fastlock(lock, TASK_KILLABLE, NULL, rt_mutex_slowlock);
307 EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
309 @@ -1845,6 +1981,7 @@ rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
311 return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
312 RT_MUTEX_MIN_CHAINWALK,
316 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
317 @@ -2099,7 +2236,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
318 set_current_state(TASK_INTERRUPTIBLE);
320 /* sleep on the mutex */
321 - ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
322 + ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter, NULL);
325 remove_waiter(lock, waiter);
326 @@ -2115,24 +2252,88 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
330 -#ifdef CONFIG_PREEMPT_RT_FULL
333 -struct ww_acquire_ctx {
335 -int __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
337 +ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
340 +#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
343 + if (ctx->deadlock_inject_countdown-- == 0) {
344 + tmp = ctx->deadlock_inject_interval;
345 + if (tmp > UINT_MAX/4)
348 + tmp = tmp*2 + tmp + tmp/2;
350 + ctx->deadlock_inject_interval = tmp;
351 + ctx->deadlock_inject_countdown = tmp;
352 + ctx->contending_lock = lock;
354 + ww_mutex_unlock(lock);
362 -EXPORT_SYMBOL_GPL(__ww_mutex_lock);
363 -int __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
365 +#ifdef CONFIG_PREEMPT_RT_FULL
367 +__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
374 + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
375 + ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx);
377 + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
378 + else if (!ret && ww_ctx->acquired > 1)
379 + return ww_mutex_deadlock_injection(lock, ww_ctx);
383 EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
386 +__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
392 + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_);
393 + ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx);
395 + mutex_release(&lock->base.dep_map, 1, _RET_IP_);
396 + else if (!ret && ww_ctx->acquired > 1)
397 + return ww_mutex_deadlock_injection(lock, ww_ctx);
401 +EXPORT_SYMBOL_GPL(__ww_mutex_lock);
403 void __sched ww_mutex_unlock(struct ww_mutex *lock)
406 + int nest = !!lock->ctx;
409 + * The unlocking fastpath is the 0->1 transition from 'locked'
410 + * into 'unlocked' state:
413 +#ifdef CONFIG_DEBUG_MUTEXES
414 + DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired);
416 + if (lock->ctx->acquired > 0)
417 + lock->ctx->acquired--;
421 + mutex_release(&lock->base.dep_map, nest, _RET_IP_);
422 + rt_mutex_unlock(&lock->base.lock);
424 -EXPORT_SYMBOL_GPL(ww_mutex_unlock);
425 +EXPORT_SYMBOL(ww_mutex_unlock);