1 /* Linuxthreads - a simple clone()-based implementation of Posix */
2 /* threads for Linux. */
3 /* Copyright (C) 1998 Xavier Leroy (Xavier.Leroy@inria.fr) */
5 /* This program is free software; you can redistribute it and/or */
6 /* modify it under the terms of the GNU Library General Public License */
7 /* as published by the Free Software Foundation; either version 2 */
8 /* of the License, or (at your option) any later version. */
10 /* This program is distributed in the hope that it will be useful, */
11 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
12 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
13 /* GNU Library General Public License for more details. */
18 //l4/#include <sched.h>
23 #include "internals.h"
27 #include <l4/sys/thread.h>
28 #include <l4/util/util.h>
30 static void __pthread_acquire(int * spinlock);
32 static __inline__ void __pthread_release(int * spinlock)
34 WRITE_MEMORY_BARRIER();
35 *spinlock = __LT_SPINLOCK_INIT;
36 __asm__ __volatile__ ("" : "=m" (*spinlock) : "m" (*spinlock));
40 /* The status field of a spinlock is a pointer whose least significant
43 Thus the field values have the following meanings:
45 status == 0: spinlock is free
46 status == 1: spinlock is taken; no thread is waiting on it
48 (status & 1) == 1: spinlock is taken and (status & ~1L) is a
49 pointer to the first waiting thread; other
50 waiting threads are linked via the p_nextlock
52 (status & 1) == 0: same as above, but spinlock is not taken.
54 The waiting list is not sorted by priority order.
55 Actually, we always insert at top of list (sole insertion mode
56 that can be performed without locking).
57 For __pthread_unlock, we perform a linear search in the list
58 to find the highest-priority, oldest waiting thread.
59 This is safe because there are no concurrent __pthread_unlock
60 operations -- only the thread that locked the mutex can unlock it. */
63 void internal_function __pthread_lock(struct _pthread_fastlock * lock,
66 #if defined HAS_COMPARE_AND_SWAP
67 long oldstatus, newstatus;
68 int successful_seizure, spurious_wakeup_count;
72 #if defined TEST_FOR_COMPARE_AND_SWAP
73 if (!__pthread_has_cas)
75 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
77 __pthread_acquire(&lock->__spinlock);
82 #if defined HAS_COMPARE_AND_SWAP
83 /* First try it without preparation. Maybe it's a completely
85 if (lock->__status == 0 && __compare_and_swap (&lock->__status, 0, 1))
88 spurious_wakeup_count = 0;
91 /* On SMP, try spinning to get the lock. */
93 if (__pthread_smp_kernel) {
94 int max_count = lock->__spinlock * 2 + 10;
96 if (max_count > MAX_ADAPTIVE_SPIN_COUNT)
97 max_count = MAX_ADAPTIVE_SPIN_COUNT;
99 for (spin_count = 0; spin_count < max_count; spin_count++) {
100 if (((oldstatus = lock->__status) & 1) == 0) {
101 if(__compare_and_swap(&lock->__status, oldstatus, oldstatus | 1))
104 lock->__spinlock += (spin_count - lock->__spinlock) / 8;
105 READ_MEMORY_BARRIER();
112 __asm__ __volatile__ ("" : "=m" (lock->__status) : "m" (lock->__status));
115 lock->__spinlock += (spin_count - lock->__spinlock) / 8;
120 /* No luck, try once more or suspend. */
123 oldstatus = lock->__status;
124 successful_seizure = 0;
126 if ((oldstatus & 1) == 0) {
127 newstatus = oldstatus | 1;
128 successful_seizure = 1;
131 self = thread_self();
132 newstatus = (long) self | 1;
136 THREAD_SETMEM(self, p_nextlock, (pthread_descr) (oldstatus));
137 /* Make sure the store in p_nextlock completes before performing
138 the compare-and-swap */
141 } while(! __compare_and_swap(&lock->__status, oldstatus, newstatus));
143 /* Suspend with guard against spurious wakeup.
144 This can happen in pthread_cond_timedwait_relative, when the thread
145 wakes up due to timeout and is still on the condvar queue, and then
146 locks the queue to remove itself. At that point it may still be on the
147 queue, and may be resumed by a condition signal. */
149 if (!successful_seizure) {
152 if (self->p_nextlock != NULL) {
153 /* Count resumes that don't belong to us. */
154 spurious_wakeup_count++;
162 /* Put back any resumes we caught that don't belong to us. */
163 while (spurious_wakeup_count--)
166 READ_MEMORY_BARRIER();
170 int __pthread_unlock(struct _pthread_fastlock * lock)
172 #if defined HAS_COMPARE_AND_SWAP
174 pthread_descr thr, * ptr, * maxptr;
178 #if defined TEST_FOR_COMPARE_AND_SWAP
179 if (!__pthread_has_cas)
181 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
183 __pthread_release(&lock->__spinlock);
188 #if defined HAS_COMPARE_AND_SWAP
189 WRITE_MEMORY_BARRIER();
192 while ((oldstatus = lock->__status) == 1) {
193 if (__compare_and_swap_with_release_semantics(&lock->__status,
198 /* Find thread in waiting queue with maximal priority */
199 ptr = (pthread_descr *) &lock->__status;
200 thr = (pthread_descr) (oldstatus & ~1L);
204 /* Before we iterate over the wait queue, we need to execute
205 a read barrier, otherwise we may read stale contents of nodes that may
206 just have been inserted by other processors. One read barrier is enough to
207 ensure we have a stable list; we don't need one for each pointer chase
208 through the list, because we are the owner of the lock; other threads
209 can only add nodes at the front; if a front node is consistent,
210 the ones behind it must also be. */
212 READ_MEMORY_BARRIER();
215 if (thr->p_priority >= maxprio) {
217 maxprio = thr->p_priority;
219 ptr = &(thr->p_nextlock);
220 thr = (pthread_descr)((long)(thr->p_nextlock) & ~1L);
223 /* Remove max prio thread from waiting list. */
224 if (maxptr == (pthread_descr *) &lock->__status) {
225 /* If max prio thread is at head, remove it with compare-and-swap
226 to guard against concurrent lock operation. This removal
227 also has the side effect of marking the lock as released
228 because the new status comes from thr->p_nextlock whose
229 least significant bit is clear. */
230 thr = (pthread_descr) (oldstatus & ~1L);
231 if (! __compare_and_swap_with_release_semantics
232 (&lock->__status, oldstatus, (long)(thr->p_nextlock) & ~1L))
235 /* No risk of concurrent access, remove max prio thread normally.
236 But in this case we must also flip the least significant bit
237 of the status to mark the lock as released. */
238 thr = (pthread_descr)((long)*maxptr & ~1L);
239 *maxptr = thr->p_nextlock;
241 /* Ensure deletion from linked list completes before we
243 WRITE_MEMORY_BARRIER();
246 oldstatus = lock->__status;
247 } while (!__compare_and_swap_with_release_semantics(&lock->__status,
248 oldstatus, oldstatus & ~1L));
251 /* Wake up the selected waiting thread. Woken thread can check
252 its own p_nextlock field for NULL to detect that it has been removed. No
253 barrier is needed here, since restart() and suspend() take
254 care of memory synchronization. */
256 thr->p_nextlock = NULL;
264 * Alternate fastlocks do not queue threads directly. Instead, they queue
265 * these wait queue node structures. When a timed wait wakes up due to
266 * a timeout, it can leave its wait node in the queue (because there
267 * is no safe way to remove from the quue). Some other thread will
268 * deallocate the abandoned node.
273 struct wait_node *next; /* Next node in null terminated linked list */
274 pthread_descr thr; /* The thread waiting with this node */
275 int abandoned; /* Atomic flag */
278 static long wait_node_free_list;
279 static int wait_node_free_list_spinlock;
281 /* Allocate a new node from the head of the free list using an atomic
282 operation, or else using malloc if that list is empty. A fundamental
283 assumption here is that we can safely access wait_node_free_list->next.
284 That's because we never free nodes once we allocate them, so a pointer to a
285 node remains valid indefinitely. */
287 static struct wait_node *wait_node_alloc(void)
289 struct wait_node *new_node = 0;
291 __pthread_acquire(&wait_node_free_list_spinlock);
292 if (wait_node_free_list != 0) {
293 new_node = (struct wait_node *) wait_node_free_list;
294 wait_node_free_list = (long) new_node->next;
296 WRITE_MEMORY_BARRIER();
297 __pthread_release(&wait_node_free_list_spinlock);
300 return malloc(sizeof *wait_node_alloc());
305 /* Return a node to the head of the free list using an atomic
308 static void wait_node_free(struct wait_node *wn)
310 __pthread_acquire(&wait_node_free_list_spinlock);
311 wn->next = (struct wait_node *) wait_node_free_list;
312 wait_node_free_list = (long) wn;
313 WRITE_MEMORY_BARRIER();
314 __pthread_release(&wait_node_free_list_spinlock);
318 #if defined HAS_COMPARE_AND_SWAP
320 /* Remove a wait node from the specified queue. It is assumed
321 that the removal takes place concurrently with only atomic insertions at the
322 head of the queue. */
324 static void wait_node_dequeue(struct wait_node **pp_head,
325 struct wait_node **pp_node,
326 struct wait_node *p_node)
328 /* If the node is being deleted from the head of the
329 list, it must be deleted using atomic compare-and-swap.
330 Otherwise it can be deleted in the straightforward way. */
332 if (pp_node == pp_head) {
333 /* We don't need a read barrier between these next two loads,
334 because it is assumed that the caller has already ensured
335 the stability of *p_node with respect to p_node. */
337 long oldvalue = (long) p_node;
338 long newvalue = (long) p_node->next;
340 if (__compare_and_swap((long *) pp_node, oldvalue, newvalue))
343 /* Oops! Compare and swap failed, which means the node is
344 no longer first. We delete it using the ordinary method. But we don't
345 know the identity of the node which now holds the pointer to the node
346 being deleted, so we must search from the beginning. */
348 for (pp_node = pp_head; p_node != *pp_node; ) {
349 pp_node = &(*pp_node)->next;
350 READ_MEMORY_BARRIER(); /* Stabilize *pp_node for next iteration. */
354 *pp_node = p_node->next;
360 void __pthread_alt_lock(struct _pthread_fastlock * lock,
363 #if defined HAS_COMPARE_AND_SWAP
364 long oldstatus, newstatus;
366 struct wait_node wait_node;
368 #if defined TEST_FOR_COMPARE_AND_SWAP
369 if (!__pthread_has_cas)
371 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
373 int suspend_needed = 0;
374 __pthread_acquire(&lock->__spinlock);
376 if (lock->__status == 0)
380 self = thread_self();
382 wait_node.abandoned = 0;
383 wait_node.next = (struct wait_node *) lock->__status;
384 wait_node.thr = self;
385 lock->__status = (long) &wait_node;
389 __pthread_release(&lock->__spinlock);
397 #if defined HAS_COMPARE_AND_SWAP
399 oldstatus = lock->__status;
400 if (oldstatus == 0) {
404 self = thread_self();
405 wait_node.thr = self;
406 newstatus = (long) &wait_node;
408 wait_node.abandoned = 0;
409 wait_node.next = (struct wait_node *) oldstatus;
410 /* Make sure the store in wait_node.next completes before performing
411 the compare-and-swap */
413 } while(! __compare_and_swap(&lock->__status, oldstatus, newstatus));
415 /* Suspend. Note that unlike in __pthread_lock, we don't worry
416 here about spurious wakeup. That's because this lock is not
417 used in situations where that can happen; the restart can
418 only come from the previous lock owner. */
423 READ_MEMORY_BARRIER();
427 /* Timed-out lock operation; returns 0 to indicate timeout. */
429 int __pthread_alt_timedlock(struct _pthread_fastlock * lock,
430 pthread_descr self, const struct timespec *abstime)
433 #if defined HAS_COMPARE_AND_SWAP
436 struct wait_node *p_wait_node = wait_node_alloc();
438 /* Out of memory, just give up and do ordinary lock. */
439 if (p_wait_node == 0) {
440 __pthread_alt_lock(lock, self);
444 #if defined TEST_FOR_COMPARE_AND_SWAP
445 if (!__pthread_has_cas)
447 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
449 __pthread_acquire(&lock->__spinlock);
451 if (lock->__status == 0)
455 self = thread_self();
457 p_wait_node->abandoned = 0;
458 p_wait_node->next = (struct wait_node *) lock->__status;
459 p_wait_node->thr = self;
460 lock->__status = (long) p_wait_node;
461 oldstatus = 1; /* force suspend */
464 __pthread_release(&lock->__spinlock);
469 #if defined HAS_COMPARE_AND_SWAP
471 oldstatus = lock->__status;
472 if (oldstatus == 0) {
476 self = thread_self();
477 p_wait_node->thr = self;
478 newstatus = (long) p_wait_node;
480 p_wait_node->abandoned = 0;
481 p_wait_node->next = (struct wait_node *) oldstatus;
482 /* Make sure the store in wait_node.next completes before performing
483 the compare-and-swap */
485 } while(! __compare_and_swap(&lock->__status, oldstatus, newstatus));
488 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
492 /* If we did not get the lock, do a timed suspend. If we wake up due
493 to a timeout, then there is a race; the old lock owner may try
494 to remove us from the queue. This race is resolved by us and the owner
495 doing an atomic testandset() to change the state of the wait node from 0
496 to 1. If we succeed, then it's a timeout and we abandon the node in the
497 queue. If we fail, it means the owner gave us the lock. */
499 if (oldstatus != 0) {
500 if (timedsuspend(self, abstime) == 0) {
501 if (!testandset(&p_wait_node->abandoned))
502 return 0; /* Timeout! */
504 /* Eat oustanding resume from owner, otherwise wait_node_free() below
505 will race with owner's wait_node_dequeue(). */
510 wait_node_free(p_wait_node);
512 READ_MEMORY_BARRIER();
514 return 1; /* Got the lock! */
517 void __pthread_alt_unlock(struct _pthread_fastlock *lock)
519 struct wait_node *p_node, **pp_node, *p_max_prio, **pp_max_prio;
520 struct wait_node ** const pp_head = (struct wait_node **) &lock->__status;
523 WRITE_MEMORY_BARRIER();
525 #if defined TEST_FOR_COMPARE_AND_SWAP
526 if (!__pthread_has_cas)
528 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
530 __pthread_acquire(&lock->__spinlock);
536 /* If no threads are waiting for this lock, try to just
537 atomically release it. */
538 #if defined TEST_FOR_COMPARE_AND_SWAP
539 if (!__pthread_has_cas)
541 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
543 if (lock->__status == 0 || lock->__status == 1) {
550 #if defined TEST_FOR_COMPARE_AND_SWAP
554 #if defined HAS_COMPARE_AND_SWAP
556 long oldstatus = lock->__status;
557 if (oldstatus == 0 || oldstatus == 1) {
558 if (__compare_and_swap_with_release_semantics (&lock->__status, oldstatus, 0))
566 /* Process the entire queue of wait nodes. Remove all abandoned
567 wait nodes and put them into the global free queue, and
568 remember the one unabandoned node which refers to the thread
569 having the highest priority. */
571 pp_max_prio = pp_node = pp_head;
572 p_max_prio = p_node = *pp_head;
575 READ_MEMORY_BARRIER(); /* Prevent access to stale data through p_node */
577 while (p_node != (struct wait_node *) 1) {
580 if (p_node->abandoned) {
581 /* Remove abandoned node. */
582 #if defined TEST_FOR_COMPARE_AND_SWAP
583 if (!__pthread_has_cas)
585 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
586 *pp_node = p_node->next;
588 #if defined TEST_FOR_COMPARE_AND_SWAP
591 #if defined HAS_COMPARE_AND_SWAP
592 wait_node_dequeue(pp_head, pp_node, p_node);
594 wait_node_free(p_node);
595 /* Note that the next assignment may take us to the beginning
596 of the queue, to newly inserted nodes, if pp_node == pp_head.
597 In that case we need a memory barrier to stabilize the first of
600 if (pp_node == pp_head)
601 READ_MEMORY_BARRIER(); /* No stale reads through p_node */
603 } else if ((prio = p_node->thr->p_priority) >= maxprio) {
604 /* Otherwise remember it if its thread has a higher or equal priority
605 compared to that of any node seen thus far. */
607 pp_max_prio = pp_node;
611 /* This canno6 jump backward in the list, so no further read
612 barrier is needed. */
613 pp_node = &p_node->next;
617 /* If all threads abandoned, go back to top */
618 if (maxprio == INT_MIN)
621 ASSERT (p_max_prio != (struct wait_node *) 1);
623 /* Now we want to to remove the max priority thread's wait node from
624 the list. Before we can do this, we must atomically try to change the
625 node's abandon state from zero to nonzero. If we succeed, that means we
626 have the node that we will wake up. If we failed, then it means the
627 thread timed out and abandoned the node in which case we repeat the
628 whole unlock operation. */
630 if (!testandset(&p_max_prio->abandoned)) {
631 #if defined TEST_FOR_COMPARE_AND_SWAP
632 if (!__pthread_has_cas)
634 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
635 *pp_max_prio = p_max_prio->next;
637 #if defined TEST_FOR_COMPARE_AND_SWAP
640 #if defined HAS_COMPARE_AND_SWAP
641 wait_node_dequeue(pp_head, pp_max_prio, p_max_prio);
644 /* Release the spinlock *before* restarting. */
645 #if defined TEST_FOR_COMPARE_AND_SWAP
646 if (!__pthread_has_cas)
648 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
650 __pthread_release(&lock->__spinlock);
654 restart(p_max_prio->thr);
660 #if defined TEST_FOR_COMPARE_AND_SWAP
661 if (!__pthread_has_cas)
663 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
665 __pthread_release(&lock->__spinlock);
671 /* Compare-and-swap emulation with a spinlock */
673 #ifdef TEST_FOR_COMPARE_AND_SWAP
674 int __pthread_has_cas = 0;
677 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
679 int __pthread_compare_and_swap(long * ptr, long oldval, long newval,
684 __pthread_acquire(spinlock);
686 if (*ptr == oldval) {
687 *ptr = newval; res = 1;
692 __pthread_release(spinlock);
699 /* The retry strategy is as follows:
700 - We test and set the spinlock MAX_SPIN_COUNT times, calling
701 sched_yield() each time. This gives ample opportunity for other
702 threads with priority >= our priority to make progress and
703 release the spinlock.
704 - If a thread with priority < our priority owns the spinlock,
705 calling sched_yield() repeatedly is useless, since we're preventing
706 the owning thread from making progress and releasing the spinlock.
707 So, after MAX_SPIN_LOCK attemps, we suspend the calling thread
708 using nanosleep(). This again should give time to the owning thread
709 for releasing the spinlock.
710 Notice that the nanosleep() interval must not be too small,
711 since the kernel does busy-waiting for short intervals in a realtime
712 process (!). The smallest duration that guarantees thread
713 suspension is currently 2ms.
714 - When nanosleep() returns, we try again, doing MAX_SPIN_COUNT
715 sched_yield(), then sleeping again if needed. */
717 static void __pthread_acquire(int * spinlock)
721 READ_MEMORY_BARRIER();
723 while (testandset(spinlock)) {
724 if (cnt < MAX_SPIN_COUNT) {
728 l4_usleep(SPIN_SLEEP_DURATION / 1000);