Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband
[sfrench/cifs-2.6.git] / kernel / rtmutex.c
1 /*
2  * RT-Mutexes: simple blocking mutual exclusion locks with PI support
3  *
4  * started by Ingo Molnar and Thomas Gleixner.
5  *
6  *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
9  *  Copyright (C) 2006 Esben Nielsen
10  *
11  *  See Documentation/rt-mutex-design.txt for details.
12  */
13 #include <linux/spinlock.h>
14 #include <linux/module.h>
15 #include <linux/sched.h>
16 #include <linux/timer.h>
17
18 #include "rtmutex_common.h"
19
20 #ifdef CONFIG_DEBUG_RT_MUTEXES
21 # include "rtmutex-debug.h"
22 #else
23 # include "rtmutex.h"
24 #endif
25
26 /*
27  * lock->owner state tracking:
28  *
29  * lock->owner holds the task_struct pointer of the owner. Bit 0 and 1
30  * are used to keep track of the "owner is pending" and "lock has
31  * waiters" state.
32  *
33  * owner        bit1    bit0
34  * NULL         0       0       lock is free (fast acquire possible)
35  * NULL         0       1       invalid state
36  * NULL         1       0       Transitional State*
37  * NULL         1       1       invalid state
38  * taskpointer  0       0       lock is held (fast release possible)
39  * taskpointer  0       1       task is pending owner
40  * taskpointer  1       0       lock is held and has waiters
41  * taskpointer  1       1       task is pending owner and lock has more waiters
42  *
43  * Pending ownership is assigned to the top (highest priority)
44  * waiter of the lock, when the lock is released. The thread is woken
45  * up and can now take the lock. Until the lock is taken (bit 0
46  * cleared) a competing higher priority thread can steal the lock
47  * which puts the woken up thread back on the waiters list.
48  *
49  * The fast atomic compare exchange based acquire and release is only
50  * possible when bit 0 and 1 of lock->owner are 0.
51  *
52  * (*) There's a small time where the owner can be NULL and the
53  * "lock has waiters" bit is set.  This can happen when grabbing the lock.
54  * To prevent a cmpxchg of the owner releasing the lock, we need to set this
55  * bit before looking at the lock, hence the reason this is a transitional
56  * state.
57  */
58
59 void
60 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner,
61                    unsigned long mask)
62 {
63         unsigned long val = (unsigned long)owner | mask;
64
65         if (rt_mutex_has_waiters(lock))
66                 val |= RT_MUTEX_HAS_WAITERS;
67
68         lock->owner = (struct task_struct *)val;
69 }
70
71 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
72 {
73         lock->owner = (struct task_struct *)
74                         ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
75 }
76
77 static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
78 {
79         if (!rt_mutex_has_waiters(lock))
80                 clear_rt_mutex_waiters(lock);
81 }
82
83 /*
84  * Calculate task priority from the waiter list priority
85  *
86  * Return task->normal_prio when the waiter list is empty or when
87  * the waiter is not allowed to do priority boosting
88  */
89 int rt_mutex_getprio(struct task_struct *task)
90 {
91         if (likely(!task_has_pi_waiters(task)))
92                 return task->normal_prio;
93
94         return min(task_top_pi_waiter(task)->pi_list_entry.prio,
95                    task->normal_prio);
96 }
97
98 /*
99  * Adjust the priority of a task, after its pi_waiters got modified.
100  *
101  * This can be both boosting and unboosting. task->pi_lock must be held.
102  */
103 void __rt_mutex_adjust_prio(struct task_struct *task)
104 {
105         int prio = rt_mutex_getprio(task);
106
107         if (task->prio != prio)
108                 rt_mutex_setprio(task, prio);
109 }
110
111 /*
112  * Adjust task priority (undo boosting). Called from the exit path of
113  * rt_mutex_slowunlock() and rt_mutex_slowlock().
114  *
115  * (Note: We do this outside of the protection of lock->wait_lock to
116  * allow the lock to be taken while or before we readjust the priority
117  * of task. We do not use the spin_xx_mutex() variants here as we are
118  * outside of the debug path.)
119  */
120 static void rt_mutex_adjust_prio(struct task_struct *task)
121 {
122         unsigned long flags;
123
124         spin_lock_irqsave(&task->pi_lock, flags);
125         __rt_mutex_adjust_prio(task);
126         spin_unlock_irqrestore(&task->pi_lock, flags);
127 }
128
129 /*
130  * Max number of times we'll walk the boosting chain:
131  */
132 int max_lock_depth = 1024;
133
134 /*
135  * Adjust the priority chain. Also used for deadlock detection.
136  * Decreases task's usage by one - may thus free the task.
137  * Returns 0 or -EDEADLK.
138  */
139 int rt_mutex_adjust_prio_chain(struct task_struct *task,
140                                int deadlock_detect,
141                                struct rt_mutex *orig_lock,
142                                struct rt_mutex_waiter *orig_waiter,
143                                struct task_struct *top_task)
144 {
145         struct rt_mutex *lock;
146         struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
147         int detect_deadlock, ret = 0, depth = 0;
148         unsigned long flags;
149
150         detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter,
151                                                          deadlock_detect);
152
153         /*
154          * The (de)boosting is a step by step approach with a lot of
155          * pitfalls. We want this to be preemptible and we want hold a
156          * maximum of two locks per step. So we have to check
157          * carefully whether things change under us.
158          */
159  again:
160         if (++depth > max_lock_depth) {
161                 static int prev_max;
162
163                 /*
164                  * Print this only once. If the admin changes the limit,
165                  * print a new message when reaching the limit again.
166                  */
167                 if (prev_max != max_lock_depth) {
168                         prev_max = max_lock_depth;
169                         printk(KERN_WARNING "Maximum lock depth %d reached "
170                                "task: %s (%d)\n", max_lock_depth,
171                                top_task->comm, top_task->pid);
172                 }
173                 put_task_struct(task);
174
175                 return deadlock_detect ? -EDEADLK : 0;
176         }
177  retry:
178         /*
179          * Task can not go away as we did a get_task() before !
180          */
181         spin_lock_irqsave(&task->pi_lock, flags);
182
183         waiter = task->pi_blocked_on;
184         /*
185          * Check whether the end of the boosting chain has been
186          * reached or the state of the chain has changed while we
187          * dropped the locks.
188          */
189         if (!waiter || !waiter->task)
190                 goto out_unlock_pi;
191
192         if (top_waiter && (!task_has_pi_waiters(task) ||
193                            top_waiter != task_top_pi_waiter(task)))
194                 goto out_unlock_pi;
195
196         /*
197          * When deadlock detection is off then we check, if further
198          * priority adjustment is necessary.
199          */
200         if (!detect_deadlock && waiter->list_entry.prio == task->prio)
201                 goto out_unlock_pi;
202
203         lock = waiter->lock;
204         if (!spin_trylock(&lock->wait_lock)) {
205                 spin_unlock_irqrestore(&task->pi_lock, flags);
206                 cpu_relax();
207                 goto retry;
208         }
209
210         /* Deadlock detection */
211         if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
212                 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
213                 spin_unlock(&lock->wait_lock);
214                 ret = deadlock_detect ? -EDEADLK : 0;
215                 goto out_unlock_pi;
216         }
217
218         top_waiter = rt_mutex_top_waiter(lock);
219
220         /* Requeue the waiter */
221         plist_del(&waiter->list_entry, &lock->wait_list);
222         waiter->list_entry.prio = task->prio;
223         plist_add(&waiter->list_entry, &lock->wait_list);
224
225         /* Release the task */
226         spin_unlock_irqrestore(&task->pi_lock, flags);
227         put_task_struct(task);
228
229         /* Grab the next task */
230         task = rt_mutex_owner(lock);
231         get_task_struct(task);
232         spin_lock_irqsave(&task->pi_lock, flags);
233
234         if (waiter == rt_mutex_top_waiter(lock)) {
235                 /* Boost the owner */
236                 plist_del(&top_waiter->pi_list_entry, &task->pi_waiters);
237                 waiter->pi_list_entry.prio = waiter->list_entry.prio;
238                 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
239                 __rt_mutex_adjust_prio(task);
240
241         } else if (top_waiter == waiter) {
242                 /* Deboost the owner */
243                 plist_del(&waiter->pi_list_entry, &task->pi_waiters);
244                 waiter = rt_mutex_top_waiter(lock);
245                 waiter->pi_list_entry.prio = waiter->list_entry.prio;
246                 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
247                 __rt_mutex_adjust_prio(task);
248         }
249
250         spin_unlock_irqrestore(&task->pi_lock, flags);
251
252         top_waiter = rt_mutex_top_waiter(lock);
253         spin_unlock(&lock->wait_lock);
254
255         if (!detect_deadlock && waiter != top_waiter)
256                 goto out_put_task;
257
258         goto again;
259
260  out_unlock_pi:
261         spin_unlock_irqrestore(&task->pi_lock, flags);
262  out_put_task:
263         put_task_struct(task);
264
265         return ret;
266 }
267
268 /*
269  * Optimization: check if we can steal the lock from the
270  * assigned pending owner [which might not have taken the
271  * lock yet]:
272  */
273 static inline int try_to_steal_lock(struct rt_mutex *lock)
274 {
275         struct task_struct *pendowner = rt_mutex_owner(lock);
276         struct rt_mutex_waiter *next;
277         unsigned long flags;
278
279         if (!rt_mutex_owner_pending(lock))
280                 return 0;
281
282         if (pendowner == current)
283                 return 1;
284
285         spin_lock_irqsave(&pendowner->pi_lock, flags);
286         if (current->prio >= pendowner->prio) {
287                 spin_unlock_irqrestore(&pendowner->pi_lock, flags);
288                 return 0;
289         }
290
291         /*
292          * Check if a waiter is enqueued on the pending owners
293          * pi_waiters list. Remove it and readjust pending owners
294          * priority.
295          */
296         if (likely(!rt_mutex_has_waiters(lock))) {
297                 spin_unlock_irqrestore(&pendowner->pi_lock, flags);
298                 return 1;
299         }
300
301         /* No chain handling, pending owner is not blocked on anything: */
302         next = rt_mutex_top_waiter(lock);
303         plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
304         __rt_mutex_adjust_prio(pendowner);
305         spin_unlock_irqrestore(&pendowner->pi_lock, flags);
306
307         /*
308          * We are going to steal the lock and a waiter was
309          * enqueued on the pending owners pi_waiters queue. So
310          * we have to enqueue this waiter into
311          * current->pi_waiters list. This covers the case,
312          * where current is boosted because it holds another
313          * lock and gets unboosted because the booster is
314          * interrupted, so we would delay a waiter with higher
315          * priority as current->normal_prio.
316          *
317          * Note: in the rare case of a SCHED_OTHER task changing
318          * its priority and thus stealing the lock, next->task
319          * might be current:
320          */
321         if (likely(next->task != current)) {
322                 spin_lock_irqsave(&current->pi_lock, flags);
323                 plist_add(&next->pi_list_entry, &current->pi_waiters);
324                 __rt_mutex_adjust_prio(current);
325                 spin_unlock_irqrestore(&current->pi_lock, flags);
326         }
327         return 1;
328 }
329
330 /*
331  * Try to take an rt-mutex
332  *
333  * This fails
334  * - when the lock has a real owner
335  * - when a different pending owner exists and has higher priority than current
336  *
337  * Must be called with lock->wait_lock held.
338  */
339 static int try_to_take_rt_mutex(struct rt_mutex *lock)
340 {
341         /*
342          * We have to be careful here if the atomic speedups are
343          * enabled, such that, when
344          *  - no other waiter is on the lock
345          *  - the lock has been released since we did the cmpxchg
346          * the lock can be released or taken while we are doing the
347          * checks and marking the lock with RT_MUTEX_HAS_WAITERS.
348          *
349          * The atomic acquire/release aware variant of
350          * mark_rt_mutex_waiters uses a cmpxchg loop. After setting
351          * the WAITERS bit, the atomic release / acquire can not
352          * happen anymore and lock->wait_lock protects us from the
353          * non-atomic case.
354          *
355          * Note, that this might set lock->owner =
356          * RT_MUTEX_HAS_WAITERS in the case the lock is not contended
357          * any more. This is fixed up when we take the ownership.
358          * This is the transitional state explained at the top of this file.
359          */
360         mark_rt_mutex_waiters(lock);
361
362         if (rt_mutex_owner(lock) && !try_to_steal_lock(lock))
363                 return 0;
364
365         /* We got the lock. */
366         debug_rt_mutex_lock(lock);
367
368         rt_mutex_set_owner(lock, current, 0);
369
370         rt_mutex_deadlock_account_lock(lock, current);
371
372         return 1;
373 }
374
375 /*
376  * Task blocks on lock.
377  *
378  * Prepare waiter and propagate pi chain
379  *
380  * This must be called with lock->wait_lock held.
381  */
382 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
383                                    struct rt_mutex_waiter *waiter,
384                                    int detect_deadlock)
385 {
386         struct task_struct *owner = rt_mutex_owner(lock);
387         struct rt_mutex_waiter *top_waiter = waiter;
388         unsigned long flags;
389         int chain_walk = 0, res;
390
391         spin_lock_irqsave(&current->pi_lock, flags);
392         __rt_mutex_adjust_prio(current);
393         waiter->task = current;
394         waiter->lock = lock;
395         plist_node_init(&waiter->list_entry, current->prio);
396         plist_node_init(&waiter->pi_list_entry, current->prio);
397
398         /* Get the top priority waiter on the lock */
399         if (rt_mutex_has_waiters(lock))
400                 top_waiter = rt_mutex_top_waiter(lock);
401         plist_add(&waiter->list_entry, &lock->wait_list);
402
403         current->pi_blocked_on = waiter;
404
405         spin_unlock_irqrestore(&current->pi_lock, flags);
406
407         if (waiter == rt_mutex_top_waiter(lock)) {
408                 spin_lock_irqsave(&owner->pi_lock, flags);
409                 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
410                 plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
411
412                 __rt_mutex_adjust_prio(owner);
413                 if (owner->pi_blocked_on)
414                         chain_walk = 1;
415                 spin_unlock_irqrestore(&owner->pi_lock, flags);
416         }
417         else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
418                 chain_walk = 1;
419
420         if (!chain_walk)
421                 return 0;
422
423         /*
424          * The owner can't disappear while holding a lock,
425          * so the owner struct is protected by wait_lock.
426          * Gets dropped in rt_mutex_adjust_prio_chain()!
427          */
428         get_task_struct(owner);
429
430         spin_unlock(&lock->wait_lock);
431
432         res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
433                                          current);
434
435         spin_lock(&lock->wait_lock);
436
437         return res;
438 }
439
440 /*
441  * Wake up the next waiter on the lock.
442  *
443  * Remove the top waiter from the current tasks waiter list and from
444  * the lock waiter list. Set it as pending owner. Then wake it up.
445  *
446  * Called with lock->wait_lock held.
447  */
448 static void wakeup_next_waiter(struct rt_mutex *lock)
449 {
450         struct rt_mutex_waiter *waiter;
451         struct task_struct *pendowner;
452         unsigned long flags;
453
454         spin_lock_irqsave(&current->pi_lock, flags);
455
456         waiter = rt_mutex_top_waiter(lock);
457         plist_del(&waiter->list_entry, &lock->wait_list);
458
459         /*
460          * Remove it from current->pi_waiters. We do not adjust a
461          * possible priority boost right now. We execute wakeup in the
462          * boosted mode and go back to normal after releasing
463          * lock->wait_lock.
464          */
465         plist_del(&waiter->pi_list_entry, &current->pi_waiters);
466         pendowner = waiter->task;
467         waiter->task = NULL;
468
469         rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
470
471         spin_unlock_irqrestore(&current->pi_lock, flags);
472
473         /*
474          * Clear the pi_blocked_on variable and enqueue a possible
475          * waiter into the pi_waiters list of the pending owner. This
476          * prevents that in case the pending owner gets unboosted a
477          * waiter with higher priority than pending-owner->normal_prio
478          * is blocked on the unboosted (pending) owner.
479          */
480         spin_lock_irqsave(&pendowner->pi_lock, flags);
481
482         WARN_ON(!pendowner->pi_blocked_on);
483         WARN_ON(pendowner->pi_blocked_on != waiter);
484         WARN_ON(pendowner->pi_blocked_on->lock != lock);
485
486         pendowner->pi_blocked_on = NULL;
487
488         if (rt_mutex_has_waiters(lock)) {
489                 struct rt_mutex_waiter *next;
490
491                 next = rt_mutex_top_waiter(lock);
492                 plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
493         }
494         spin_unlock_irqrestore(&pendowner->pi_lock, flags);
495
496         wake_up_process(pendowner);
497 }
498
499 /*
500  * Remove a waiter from a lock
501  *
502  * Must be called with lock->wait_lock held
503  */
504 void remove_waiter(struct rt_mutex *lock,
505                    struct rt_mutex_waiter *waiter)
506 {
507         int first = (waiter == rt_mutex_top_waiter(lock));
508         struct task_struct *owner = rt_mutex_owner(lock);
509         unsigned long flags;
510         int chain_walk = 0;
511
512         spin_lock_irqsave(&current->pi_lock, flags);
513         plist_del(&waiter->list_entry, &lock->wait_list);
514         waiter->task = NULL;
515         current->pi_blocked_on = NULL;
516         spin_unlock_irqrestore(&current->pi_lock, flags);
517
518         if (first && owner != current) {
519
520                 spin_lock_irqsave(&owner->pi_lock, flags);
521
522                 plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
523
524                 if (rt_mutex_has_waiters(lock)) {
525                         struct rt_mutex_waiter *next;
526
527                         next = rt_mutex_top_waiter(lock);
528                         plist_add(&next->pi_list_entry, &owner->pi_waiters);
529                 }
530                 __rt_mutex_adjust_prio(owner);
531
532                 if (owner->pi_blocked_on)
533                         chain_walk = 1;
534
535                 spin_unlock_irqrestore(&owner->pi_lock, flags);
536         }
537
538         WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
539
540         if (!chain_walk)
541                 return;
542
543         /* gets dropped in rt_mutex_adjust_prio_chain()! */
544         get_task_struct(owner);
545
546         spin_unlock(&lock->wait_lock);
547
548         rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
549
550         spin_lock(&lock->wait_lock);
551 }
552
553 /*
554  * Recheck the pi chain, in case we got a priority setting
555  *
556  * Called from sched_setscheduler
557  */
558 void rt_mutex_adjust_pi(struct task_struct *task)
559 {
560         struct rt_mutex_waiter *waiter;
561         unsigned long flags;
562
563         spin_lock_irqsave(&task->pi_lock, flags);
564
565         waiter = task->pi_blocked_on;
566         if (!waiter || waiter->list_entry.prio == task->prio) {
567                 spin_unlock_irqrestore(&task->pi_lock, flags);
568                 return;
569         }
570
571         spin_unlock_irqrestore(&task->pi_lock, flags);
572
573         /* gets dropped in rt_mutex_adjust_prio_chain()! */
574         get_task_struct(task);
575         rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
576 }
577
578 /*
579  * Slow path lock function:
580  */
581 static int __sched
582 rt_mutex_slowlock(struct rt_mutex *lock, int state,
583                   struct hrtimer_sleeper *timeout,
584                   int detect_deadlock)
585 {
586         struct rt_mutex_waiter waiter;
587         int ret = 0;
588
589         debug_rt_mutex_init_waiter(&waiter);
590         waiter.task = NULL;
591
592         spin_lock(&lock->wait_lock);
593
594         /* Try to acquire the lock again: */
595         if (try_to_take_rt_mutex(lock)) {
596                 spin_unlock(&lock->wait_lock);
597                 return 0;
598         }
599
600         set_current_state(state);
601
602         /* Setup the timer, when timeout != NULL */
603         if (unlikely(timeout))
604                 hrtimer_start(&timeout->timer, timeout->timer.expires,
605                               HRTIMER_MODE_ABS);
606
607         for (;;) {
608                 /* Try to acquire the lock: */
609                 if (try_to_take_rt_mutex(lock))
610                         break;
611
612                 /*
613                  * TASK_INTERRUPTIBLE checks for signals and
614                  * timeout. Ignored otherwise.
615                  */
616                 if (unlikely(state == TASK_INTERRUPTIBLE)) {
617                         /* Signal pending? */
618                         if (signal_pending(current))
619                                 ret = -EINTR;
620                         if (timeout && !timeout->task)
621                                 ret = -ETIMEDOUT;
622                         if (ret)
623                                 break;
624                 }
625
626                 /*
627                  * waiter.task is NULL the first time we come here and
628                  * when we have been woken up by the previous owner
629                  * but the lock got stolen by a higher prio task.
630                  */
631                 if (!waiter.task) {
632                         ret = task_blocks_on_rt_mutex(lock, &waiter,
633                                                       detect_deadlock);
634                         /*
635                          * If we got woken up by the owner then start loop
636                          * all over without going into schedule to try
637                          * to get the lock now:
638                          */
639                         if (unlikely(!waiter.task))
640                                 continue;
641
642                         if (unlikely(ret))
643                                 break;
644                 }
645
646                 spin_unlock(&lock->wait_lock);
647
648                 debug_rt_mutex_print_deadlock(&waiter);
649
650                 if (waiter.task)
651                         schedule_rt_mutex(lock);
652
653                 spin_lock(&lock->wait_lock);
654                 set_current_state(state);
655         }
656
657         set_current_state(TASK_RUNNING);
658
659         if (unlikely(waiter.task))
660                 remove_waiter(lock, &waiter);
661
662         /*
663          * try_to_take_rt_mutex() sets the waiter bit
664          * unconditionally. We might have to fix that up.
665          */
666         fixup_rt_mutex_waiters(lock);
667
668         spin_unlock(&lock->wait_lock);
669
670         /* Remove pending timer: */
671         if (unlikely(timeout))
672                 hrtimer_cancel(&timeout->timer);
673
674         /*
675          * Readjust priority, when we did not get the lock. We might
676          * have been the pending owner and boosted. Since we did not
677          * take the lock, the PI boost has to go.
678          */
679         if (unlikely(ret))
680                 rt_mutex_adjust_prio(current);
681
682         debug_rt_mutex_free_waiter(&waiter);
683
684         return ret;
685 }
686
687 /*
688  * Slow path try-lock function:
689  */
690 static inline int
691 rt_mutex_slowtrylock(struct rt_mutex *lock)
692 {
693         int ret = 0;
694
695         spin_lock(&lock->wait_lock);
696
697         if (likely(rt_mutex_owner(lock) != current)) {
698
699                 ret = try_to_take_rt_mutex(lock);
700                 /*
701                  * try_to_take_rt_mutex() sets the lock waiters
702                  * bit unconditionally. Clean this up.
703                  */
704                 fixup_rt_mutex_waiters(lock);
705         }
706
707         spin_unlock(&lock->wait_lock);
708
709         return ret;
710 }
711
712 /*
713  * Slow path to release a rt-mutex:
714  */
715 static void __sched
716 rt_mutex_slowunlock(struct rt_mutex *lock)
717 {
718         spin_lock(&lock->wait_lock);
719
720         debug_rt_mutex_unlock(lock);
721
722         rt_mutex_deadlock_account_unlock(current);
723
724         if (!rt_mutex_has_waiters(lock)) {
725                 lock->owner = NULL;
726                 spin_unlock(&lock->wait_lock);
727                 return;
728         }
729
730         wakeup_next_waiter(lock);
731
732         spin_unlock(&lock->wait_lock);
733
734         /* Undo pi boosting if necessary: */
735         rt_mutex_adjust_prio(current);
736 }
737
738 /*
739  * debug aware fast / slowpath lock,trylock,unlock
740  *
741  * The atomic acquire/release ops are compiled away, when either the
742  * architecture does not support cmpxchg or when debugging is enabled.
743  */
744 static inline int
745 rt_mutex_fastlock(struct rt_mutex *lock, int state,
746                   int detect_deadlock,
747                   int (*slowfn)(struct rt_mutex *lock, int state,
748                                 struct hrtimer_sleeper *timeout,
749                                 int detect_deadlock))
750 {
751         if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
752                 rt_mutex_deadlock_account_lock(lock, current);
753                 return 0;
754         } else
755                 return slowfn(lock, state, NULL, detect_deadlock);
756 }
757
758 static inline int
759 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
760                         struct hrtimer_sleeper *timeout, int detect_deadlock,
761                         int (*slowfn)(struct rt_mutex *lock, int state,
762                                       struct hrtimer_sleeper *timeout,
763                                       int detect_deadlock))
764 {
765         if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
766                 rt_mutex_deadlock_account_lock(lock, current);
767                 return 0;
768         } else
769                 return slowfn(lock, state, timeout, detect_deadlock);
770 }
771
772 static inline int
773 rt_mutex_fasttrylock(struct rt_mutex *lock,
774                      int (*slowfn)(struct rt_mutex *lock))
775 {
776         if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
777                 rt_mutex_deadlock_account_lock(lock, current);
778                 return 1;
779         }
780         return slowfn(lock);
781 }
782
783 static inline void
784 rt_mutex_fastunlock(struct rt_mutex *lock,
785                     void (*slowfn)(struct rt_mutex *lock))
786 {
787         if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
788                 rt_mutex_deadlock_account_unlock(current);
789         else
790                 slowfn(lock);
791 }
792
793 /**
794  * rt_mutex_lock - lock a rt_mutex
795  *
796  * @lock: the rt_mutex to be locked
797  */
798 void __sched rt_mutex_lock(struct rt_mutex *lock)
799 {
800         might_sleep();
801
802         rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
803 }
804 EXPORT_SYMBOL_GPL(rt_mutex_lock);
805
806 /**
807  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
808  *
809  * @lock:               the rt_mutex to be locked
810  * @detect_deadlock:    deadlock detection on/off
811  *
812  * Returns:
813  *  0           on success
814  * -EINTR       when interrupted by a signal
815  * -EDEADLK     when the lock would deadlock (when deadlock detection is on)
816  */
817 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
818                                                  int detect_deadlock)
819 {
820         might_sleep();
821
822         return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
823                                  detect_deadlock, rt_mutex_slowlock);
824 }
825 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
826
827 /**
828  * rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible
829  *                                     the timeout structure is provided
830  *                                     by the caller
831  *
832  * @lock:               the rt_mutex to be locked
833  * @timeout:            timeout structure or NULL (no timeout)
834  * @detect_deadlock:    deadlock detection on/off
835  *
836  * Returns:
837  *  0           on success
838  * -EINTR       when interrupted by a signal
839  * -ETIMEOUT    when the timeout expired
840  * -EDEADLK     when the lock would deadlock (when deadlock detection is on)
841  */
842 int
843 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
844                     int detect_deadlock)
845 {
846         might_sleep();
847
848         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
849                                        detect_deadlock, rt_mutex_slowlock);
850 }
851 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
852
853 /**
854  * rt_mutex_trylock - try to lock a rt_mutex
855  *
856  * @lock:       the rt_mutex to be locked
857  *
858  * Returns 1 on success and 0 on contention
859  */
860 int __sched rt_mutex_trylock(struct rt_mutex *lock)
861 {
862         return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
863 }
864 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
865
866 /**
867  * rt_mutex_unlock - unlock a rt_mutex
868  *
869  * @lock: the rt_mutex to be unlocked
870  */
871 void __sched rt_mutex_unlock(struct rt_mutex *lock)
872 {
873         rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
874 }
875 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
876
877 /***
878  * rt_mutex_destroy - mark a mutex unusable
879  * @lock: the mutex to be destroyed
880  *
881  * This function marks the mutex uninitialized, and any subsequent
882  * use of the mutex is forbidden. The mutex must not be locked when
883  * this function is called.
884  */
885 void rt_mutex_destroy(struct rt_mutex *lock)
886 {
887         WARN_ON(rt_mutex_is_locked(lock));
888 #ifdef CONFIG_DEBUG_RT_MUTEXES
889         lock->magic = NULL;
890 #endif
891 }
892
893 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
894
895 /**
896  * __rt_mutex_init - initialize the rt lock
897  *
898  * @lock: the rt lock to be initialized
899  *
900  * Initialize the rt lock to unlocked state.
901  *
902  * Initializing of a locked rt lock is not allowed
903  */
904 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
905 {
906         lock->owner = NULL;
907         spin_lock_init(&lock->wait_lock);
908         plist_head_init(&lock->wait_list, &lock->wait_lock);
909
910         debug_rt_mutex_init(lock, name);
911 }
912 EXPORT_SYMBOL_GPL(__rt_mutex_init);
913
914 /**
915  * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
916  *                              proxy owner
917  *
918  * @lock:       the rt_mutex to be locked
919  * @proxy_owner:the task to set as owner
920  *
921  * No locking. Caller has to do serializing itself
922  * Special API call for PI-futex support
923  */
924 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
925                                 struct task_struct *proxy_owner)
926 {
927         __rt_mutex_init(lock, NULL);
928         debug_rt_mutex_proxy_lock(lock, proxy_owner);
929         rt_mutex_set_owner(lock, proxy_owner, 0);
930         rt_mutex_deadlock_account_lock(lock, proxy_owner);
931 }
932
933 /**
934  * rt_mutex_proxy_unlock - release a lock on behalf of owner
935  *
936  * @lock:       the rt_mutex to be locked
937  *
938  * No locking. Caller has to do serializing itself
939  * Special API call for PI-futex support
940  */
941 void rt_mutex_proxy_unlock(struct rt_mutex *lock,
942                            struct task_struct *proxy_owner)
943 {
944         debug_rt_mutex_proxy_unlock(lock);
945         rt_mutex_set_owner(lock, NULL, 0);
946         rt_mutex_deadlock_account_unlock(proxy_owner);
947 }
948
949 /**
950  * rt_mutex_next_owner - return the next owner of the lock
951  *
952  * @lock: the rt lock query
953  *
954  * Returns the next owner of the lock or NULL
955  *
956  * Caller has to serialize against other accessors to the lock
957  * itself.
958  *
959  * Special API call for PI-futex support
960  */
961 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
962 {
963         if (!rt_mutex_has_waiters(lock))
964                 return NULL;
965
966         return rt_mutex_top_waiter(lock)->task;
967 }