kernel/rtmutex.c

   1 /*
   2  * RT-Mutexes: simple blocking mutual exclusion locks with PI support
   3  *
   4  * started by Ingo Molnar and Thomas Gleixner.
   5  *
   6  *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
   7  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
   8  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
   9  *  Copyright (C) 2006 Esben Nielsen
  10  *
  11  *  See Documentation/rt-mutex-design.txt for details.
  12  */
  13 #include <linux/spinlock.h>
  14 #include <linux/export.h>
  15 #include <linux/sched.h>
  16 #include <linux/sched/rt.h>
  17 #include <linux/timer.h>
  18
  19 #include "rtmutex_common.h"
  20
  21 /*
  22  * lock->owner state tracking:
  23  *
  24  * lock->owner holds the task_struct pointer of the owner. Bit 0
  25  * is used to keep track of the "lock has waiters" state.
  26  *
  27  * owner        bit0
  28  * NULL         0       lock is free (fast acquire possible)
  29  * NULL         1       lock is free and has waiters and the top waiter
  30  *                              is going to take the lock*
  31  * taskpointer  0       lock is held (fast release possible)
  32  * taskpointer  1       lock is held and has waiters**
  33  *
  34  * The fast atomic compare exchange based acquire and release is only
  35  * possible when bit 0 of lock->owner is 0.
  36  *
  37  * (*) It also can be a transitional state when grabbing the lock
  38  * with ->wait_lock is held. To prevent any fast path cmpxchg to the lock,
  39  * we need to set the bit0 before looking at the lock, and the owner may be
  40  * NULL in this small time, hence this can be a transitional state.
  41  *
  42  * (**) There is a small time when bit 0 is set but there are no
  43  * waiters. This can happen when grabbing the lock in the slow path.
  44  * To prevent a cmpxchg of the owner releasing the lock, we need to
  45  * set this bit before looking at the lock.
  46  */
  47
  48 static void
  49 rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner)
  50 {
  51         unsigned long val = (unsigned long)owner;
  52
  53         if (rt_mutex_has_waiters(lock))
  54                 val |= RT_MUTEX_HAS_WAITERS;
  55
  56         lock->owner = (struct task_struct *)val;
  57 }
  58
  59 static inline void clear_rt_mutex_waiters(struct rt_mutex *lock)
  60 {
  61         lock->owner = (struct task_struct *)
  62                         ((unsigned long)lock->owner & ~RT_MUTEX_HAS_WAITERS);
  63 }
  64
  65 static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
  66 {
  67         if (!rt_mutex_has_waiters(lock))
  68                 clear_rt_mutex_waiters(lock);
  69 }
  70
  71 /*
  72  * We can speed up the acquire/release, if the architecture
  73  * supports cmpxchg and if there's no debugging state to be set up
  74  */
  75 #if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
  76 # define rt_mutex_cmpxchg(l,c,n)        (cmpxchg(&l->owner, c, n) == c)
  77 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
  78 {
  79         unsigned long owner, *p = (unsigned long *) &lock->owner;
  80
  81         do {
  82                 owner = *p;
  83         } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
  84 }
  85
  86 /*
  87  * Safe fastpath aware unlock:
  88  * 1) Clear the waiters bit
  89  * 2) Drop lock->wait_lock
  90  * 3) Try to unlock the lock with cmpxchg
  91  */
  92 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
  93         __releases(lock->wait_lock)
  94 {
  95         struct task_struct *owner = rt_mutex_owner(lock);
  96
  97         clear_rt_mutex_waiters(lock);
  98         raw_spin_unlock(&lock->wait_lock);
  99         /*
 100          * If a new waiter comes in between the unlock and the cmpxchg
 101          * we have two situations:
 102          *
 103          * unlock(wait_lock);
 104          *                                      lock(wait_lock);
 105          * cmpxchg(p, owner, 0) == owner
 106          *                                      mark_rt_mutex_waiters(lock);
 107          *                                      acquire(lock);
 108          * or:
 109          *
 110          * unlock(wait_lock);
 111          *                                      lock(wait_lock);
 112          *                                      mark_rt_mutex_waiters(lock);
 113          *
 114          * cmpxchg(p, owner, 0) != owner
 115          *                                      enqueue_waiter();
 116          *                                      unlock(wait_lock);
 117          * lock(wait_lock);
 118          * wake waiter();
 119          * unlock(wait_lock);
 120          *                                      lock(wait_lock);
 121          *                                      acquire(lock);
 122          */
 123         return rt_mutex_cmpxchg(lock, owner, NULL);
 124 }
 125
 126 #else
 127 # define rt_mutex_cmpxchg(l,c,n)        (0)
 128 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 129 {
 130         lock->owner = (struct task_struct *)
 131                         ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
 132 }
 133
 134 /*
 135  * Simple slow path only version: lock->owner is protected by lock->wait_lock.
 136  */
 137 static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
 138         __releases(lock->wait_lock)
 139 {
 140         lock->owner = NULL;
 141         raw_spin_unlock(&lock->wait_lock);
 142         return true;
 143 }
 144 #endif
 145
 146 /*
 147  * Calculate task priority from the waiter list priority
 148  *
 149  * Return task->normal_prio when the waiter list is empty or when
 150  * the waiter is not allowed to do priority boosting
 151  */
 152 int rt_mutex_getprio(struct task_struct *task)
 153 {
 154         if (likely(!task_has_pi_waiters(task)))
 155                 return task->normal_prio;
 156
 157         return min(task_top_pi_waiter(task)->pi_list_entry.prio,
 158                    task->normal_prio);
 159 }
 160
 161 /*
 162  * Adjust the priority of a task, after its pi_waiters got modified.
 163  *
 164  * This can be both boosting and unboosting. task->pi_lock must be held.
 165  */
 166 static void __rt_mutex_adjust_prio(struct task_struct *task)
 167 {
 168         int prio = rt_mutex_getprio(task);
 169
 170         if (task->prio != prio)
 171                 rt_mutex_setprio(task, prio);
 172 }
 173
 174 /*
 175  * Adjust task priority (undo boosting). Called from the exit path of
 176  * rt_mutex_slowunlock() and rt_mutex_slowlock().
 177  *
 178  * (Note: We do this outside of the protection of lock->wait_lock to
 179  * allow the lock to be taken while or before we readjust the priority
 180  * of task. We do not use the spin_xx_mutex() variants here as we are
 181  * outside of the debug path.)
 182  */
 183 static void rt_mutex_adjust_prio(struct task_struct *task)
 184 {
 185         unsigned long flags;
 186
 187         raw_spin_lock_irqsave(&task->pi_lock, flags);
 188         __rt_mutex_adjust_prio(task);
 189         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 190 }
 191
 192 /*
 193  * Max number of times we'll walk the boosting chain:
 194  */
 195 int max_lock_depth = 1024;
 196
 197 static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
 198 {
 199         return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
 200 }
 201
 202 /*
 203  * Adjust the priority chain. Also used for deadlock detection.
 204  * Decreases task's usage by one - may thus free the task.
 205  * Returns 0 or -EDEADLK.
 206  */
 207 static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 208                                       int deadlock_detect,
 209                                       struct rt_mutex *orig_lock,
 210                                       struct rt_mutex *next_lock,
 211                                       struct rt_mutex_waiter *orig_waiter,
 212                                       struct task_struct *top_task)
 213 {
 214         struct rt_mutex *lock;
 215         struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
 216         int detect_deadlock, ret = 0, depth = 0;
 217         unsigned long flags;
 218
 219         detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter,
 220                                                          deadlock_detect);
 221
 222         /*
 223          * The (de)boosting is a step by step approach with a lot of
 224          * pitfalls. We want this to be preemptible and we want hold a
 225          * maximum of two locks per step. So we have to check
 226          * carefully whether things change under us.
 227          */
 228  again:
 229         if (++depth > max_lock_depth) {
 230                 static int prev_max;
 231
 232                 /*
 233                  * Print this only once. If the admin changes the limit,
 234                  * print a new message when reaching the limit again.
 235                  */
 236                 if (prev_max != max_lock_depth) {
 237                         prev_max = max_lock_depth;
 238                         printk(KERN_WARNING "Maximum lock depth %d reached "
 239                                "task: %s (%d)\n", max_lock_depth,
 240                                top_task->comm, task_pid_nr(top_task));
 241                 }
 242                 put_task_struct(task);
 243
 244                 return -EDEADLK;
 245         }
 246  retry:
 247         /*
 248          * Task can not go away as we did a get_task() before !
 249          */
 250         raw_spin_lock_irqsave(&task->pi_lock, flags);
 251
 252         waiter = task->pi_blocked_on;
 253         /*
 254          * Check whether the end of the boosting chain has been
 255          * reached or the state of the chain has changed while we
 256          * dropped the locks.
 257          */
 258         if (!waiter)
 259                 goto out_unlock_pi;
 260
 261         /*
 262          * Check the orig_waiter state. After we dropped the locks,
 263          * the previous owner of the lock might have released the lock.
 264          */
 265         if (orig_waiter && !rt_mutex_owner(orig_lock))
 266                 goto out_unlock_pi;
 267
 268         /*
 269          * We dropped all locks after taking a refcount on @task, so
 270          * the task might have moved on in the lock chain or even left
 271          * the chain completely and blocks now on an unrelated lock or
 272          * on @orig_lock.
 273          *
 274          * We stored the lock on which @task was blocked in @next_lock,
 275          * so we can detect the chain change.
 276          */
 277         if (next_lock != waiter->lock)
 278                 goto out_unlock_pi;
 279
 280         /*
 281          * Drop out, when the task has no waiters. Note,
 282          * top_waiter can be NULL, when we are in the deboosting
 283          * mode!
 284          */
 285         if (top_waiter) {
 286                 if (!task_has_pi_waiters(task))
 287                         goto out_unlock_pi;
 288                 /*
 289                  * If deadlock detection is off, we stop here if we
 290                  * are not the top pi waiter of the task.
 291                  */
 292                 if (!detect_deadlock && top_waiter != task_top_pi_waiter(task))
 293                         goto out_unlock_pi;
 294         }
 295
 296         /*
 297          * When deadlock detection is off then we check, if further
 298          * priority adjustment is necessary.
 299          */
 300         if (!detect_deadlock && waiter->list_entry.prio == task->prio)
 301                 goto out_unlock_pi;
 302
 303         lock = waiter->lock;
 304         if (!raw_spin_trylock(&lock->wait_lock)) {
 305                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 306                 cpu_relax();
 307                 goto retry;
 308         }
 309
 310         /*
 311          * Deadlock detection. If the lock is the same as the original
 312          * lock which caused us to walk the lock chain or if the
 313          * current lock is owned by the task which initiated the chain
 314          * walk, we detected a deadlock.
 315          */
 316         if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
 317                 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
 318                 raw_spin_unlock(&lock->wait_lock);
 319                 ret = -EDEADLK;
 320                 goto out_unlock_pi;
 321         }
 322
 323         top_waiter = rt_mutex_top_waiter(lock);
 324
 325         /* Requeue the waiter */
 326         plist_del(&waiter->list_entry, &lock->wait_list);
 327         waiter->list_entry.prio = task->prio;
 328         plist_add(&waiter->list_entry, &lock->wait_list);
 329
 330         /* Release the task */
 331         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 332         if (!rt_mutex_owner(lock)) {
 333                 /*
 334                  * If the requeue above changed the top waiter, then we need
 335                  * to wake the new top waiter up to try to get the lock.
 336                  */
 337
 338                 if (top_waiter != rt_mutex_top_waiter(lock))
 339                         wake_up_process(rt_mutex_top_waiter(lock)->task);
 340                 raw_spin_unlock(&lock->wait_lock);
 341                 goto out_put_task;
 342         }
 343         put_task_struct(task);
 344
 345         /* Grab the next task */
 346         task = rt_mutex_owner(lock);
 347         get_task_struct(task);
 348         raw_spin_lock_irqsave(&task->pi_lock, flags);
 349
 350         if (waiter == rt_mutex_top_waiter(lock)) {
 351                 /* Boost the owner */
 352                 plist_del(&top_waiter->pi_list_entry, &task->pi_waiters);
 353                 waiter->pi_list_entry.prio = waiter->list_entry.prio;
 354                 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
 355                 __rt_mutex_adjust_prio(task);
 356
 357         } else if (top_waiter == waiter) {
 358                 /* Deboost the owner */
 359                 plist_del(&waiter->pi_list_entry, &task->pi_waiters);
 360                 waiter = rt_mutex_top_waiter(lock);
 361                 waiter->pi_list_entry.prio = waiter->list_entry.prio;
 362                 plist_add(&waiter->pi_list_entry, &task->pi_waiters);
 363                 __rt_mutex_adjust_prio(task);
 364         }
 365
 366         /*
 367          * Check whether the task which owns the current lock is pi
 368          * blocked itself. If yes we store a pointer to the lock for
 369          * the lock chain change detection above. After we dropped
 370          * task->pi_lock next_lock cannot be dereferenced anymore.
 371          */
 372         next_lock = task_blocked_on_lock(task);
 373
 374         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 375
 376         top_waiter = rt_mutex_top_waiter(lock);
 377         raw_spin_unlock(&lock->wait_lock);
 378
 379         /*
 380          * We reached the end of the lock chain. Stop right here. No
 381          * point to go back just to figure that out.
 382          */
 383         if (!next_lock)
 384                 goto out_put_task;
 385
 386         if (!detect_deadlock && waiter != top_waiter)
 387                 goto out_put_task;
 388
 389         goto again;
 390
 391  out_unlock_pi:
 392         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 393  out_put_task:
 394         put_task_struct(task);
 395
 396         return ret;
 397 }
 398
 399 /*
 400  * Try to take an rt-mutex
 401  *
 402  * Must be called with lock->wait_lock held.
 403  *
 404  * @lock:   the lock to be acquired.
 405  * @task:   the task which wants to acquire the lock
 406  * @waiter: the waiter that is queued to the lock's wait list. (could be NULL)
 407  */
 408 static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
 409                 struct rt_mutex_waiter *waiter)
 410 {
 411         /*
 412          * We have to be careful here if the atomic speedups are
 413          * enabled, such that, when
 414          *  - no other waiter is on the lock
 415          *  - the lock has been released since we did the cmpxchg
 416          * the lock can be released or taken while we are doing the
 417          * checks and marking the lock with RT_MUTEX_HAS_WAITERS.
 418          *
 419          * The atomic acquire/release aware variant of
 420          * mark_rt_mutex_waiters uses a cmpxchg loop. After setting
 421          * the WAITERS bit, the atomic release / acquire can not
 422          * happen anymore and lock->wait_lock protects us from the
 423          * non-atomic case.
 424          *
 425          * Note, that this might set lock->owner =
 426          * RT_MUTEX_HAS_WAITERS in the case the lock is not contended
 427          * any more. This is fixed up when we take the ownership.
 428          * This is the transitional state explained at the top of this file.
 429          */
 430         mark_rt_mutex_waiters(lock);
 431
 432         if (rt_mutex_owner(lock))
 433                 return 0;
 434
 435         /*
 436          * It will get the lock because of one of these conditions:
 437          * 1) there is no waiter
 438          * 2) higher priority than waiters
 439          * 3) it is top waiter
 440          */
 441         if (rt_mutex_has_waiters(lock)) {
 442                 if (task->prio >= rt_mutex_top_waiter(lock)->list_entry.prio) {
 443                         if (!waiter || waiter != rt_mutex_top_waiter(lock))
 444                                 return 0;
 445                 }
 446         }
 447
 448         if (waiter || rt_mutex_has_waiters(lock)) {
 449                 unsigned long flags;
 450                 struct rt_mutex_waiter *top;
 451
 452                 raw_spin_lock_irqsave(&task->pi_lock, flags);
 453
 454                 /* remove the queued waiter. */
 455                 if (waiter) {
 456                         plist_del(&waiter->list_entry, &lock->wait_list);
 457                         task->pi_blocked_on = NULL;
 458                 }
 459
 460                 /*
 461                  * We have to enqueue the top waiter(if it exists) into
 462                  * task->pi_waiters list.
 463                  */
 464                 if (rt_mutex_has_waiters(lock)) {
 465                         top = rt_mutex_top_waiter(lock);
 466                         top->pi_list_entry.prio = top->list_entry.prio;
 467                         plist_add(&top->pi_list_entry, &task->pi_waiters);
 468                 }
 469                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 470         }
 471
 472         /* We got the lock. */
 473         debug_rt_mutex_lock(lock);
 474
 475         rt_mutex_set_owner(lock, task);
 476
 477         rt_mutex_deadlock_account_lock(lock, task);
 478
 479         return 1;
 480 }
 481
 482 /*
 483  * Task blocks on lock.
 484  *
 485  * Prepare waiter and propagate pi chain
 486  *
 487  * This must be called with lock->wait_lock held.
 488  */
 489 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
 490                                    struct rt_mutex_waiter *waiter,
 491                                    struct task_struct *task,
 492                                    int detect_deadlock)
 493 {
 494         struct task_struct *owner = rt_mutex_owner(lock);
 495         struct rt_mutex_waiter *top_waiter = waiter;
 496         struct rt_mutex *next_lock;
 497         int chain_walk = 0, res;
 498         unsigned long flags;
 499
 500         /*
 501          * Early deadlock detection. We really don't want the task to
 502          * enqueue on itself just to untangle the mess later. It's not
 503          * only an optimization. We drop the locks, so another waiter
 504          * can come in before the chain walk detects the deadlock. So
 505          * the other will detect the deadlock and return -EDEADLOCK,
 506          * which is wrong, as the other waiter is not in a deadlock
 507          * situation.
 508          */
 509         if (owner == task)
 510                 return -EDEADLK;
 511
 512         raw_spin_lock_irqsave(&task->pi_lock, flags);
 513         __rt_mutex_adjust_prio(task);
 514         waiter->task = task;
 515         waiter->lock = lock;
 516         plist_node_init(&waiter->list_entry, task->prio);
 517         plist_node_init(&waiter->pi_list_entry, task->prio);
 518
 519         /* Get the top priority waiter on the lock */
 520         if (rt_mutex_has_waiters(lock))
 521                 top_waiter = rt_mutex_top_waiter(lock);
 522         plist_add(&waiter->list_entry, &lock->wait_list);
 523
 524         task->pi_blocked_on = waiter;
 525
 526         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 527
 528         if (!owner)
 529                 return 0;
 530
 531         raw_spin_lock_irqsave(&owner->pi_lock, flags);
 532         if (waiter == rt_mutex_top_waiter(lock)) {
 533                 plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
 534                 plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
 535
 536                 __rt_mutex_adjust_prio(owner);
 537                 if (owner->pi_blocked_on)
 538                         chain_walk = 1;
 539         } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
 540                 chain_walk = 1;
 541         }
 542
 543         /* Store the lock on which owner is blocked or NULL */
 544         next_lock = task_blocked_on_lock(owner);
 545
 546         raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
 547         /*
 548          * Even if full deadlock detection is on, if the owner is not
 549          * blocked itself, we can avoid finding this out in the chain
 550          * walk.
 551          */
 552         if (!chain_walk || !next_lock)
 553                 return 0;
 554
 555         /*
 556          * The owner can't disappear while holding a lock,
 557          * so the owner struct is protected by wait_lock.
 558          * Gets dropped in rt_mutex_adjust_prio_chain()!
 559          */
 560         get_task_struct(owner);
 561
 562         raw_spin_unlock(&lock->wait_lock);
 563
 564         res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
 565                                          next_lock, waiter, task);
 566
 567         raw_spin_lock(&lock->wait_lock);
 568
 569         return res;
 570 }
 571
 572 /*
 573  * Wake up the next waiter on the lock.
 574  *
 575  * Remove the top waiter from the current tasks pi waiter list and
 576  * wake it up.
 577  *
 578  * Called with lock->wait_lock held.
 579  */
 580 static void wakeup_next_waiter(struct rt_mutex *lock)
 581 {
 582         struct rt_mutex_waiter *waiter;
 583         unsigned long flags;
 584
 585         raw_spin_lock_irqsave(&current->pi_lock, flags);
 586
 587         waiter = rt_mutex_top_waiter(lock);
 588
 589         /*
 590          * Remove it from current->pi_waiters. We do not adjust a
 591          * possible priority boost right now. We execute wakeup in the
 592          * boosted mode and go back to normal after releasing
 593          * lock->wait_lock.
 594          */
 595         plist_del(&waiter->pi_list_entry, &current->pi_waiters);
 596
 597         /*
 598          * As we are waking up the top waiter, and the waiter stays
 599          * queued on the lock until it gets the lock, this lock
 600          * obviously has waiters. Just set the bit here and this has
 601          * the added benefit of forcing all new tasks into the
 602          * slow path making sure no task of lower priority than
 603          * the top waiter can steal this lock.
 604          */
 605         lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
 606
 607         raw_spin_unlock_irqrestore(&current->pi_lock, flags);
 608
 609         /*
 610          * It's safe to dereference waiter as it cannot go away as
 611          * long as we hold lock->wait_lock. The waiter task needs to
 612          * acquire it in order to dequeue the waiter.
 613          */
 614         wake_up_process(waiter->task);
 615 }
 616
 617 /*
 618  * Remove a waiter from a lock and give up
 619  *
 620  * Must be called with lock->wait_lock held and
 621  * have just failed to try_to_take_rt_mutex().
 622  */
 623 static void remove_waiter(struct rt_mutex *lock,
 624                           struct rt_mutex_waiter *waiter)
 625 {
 626         int first = (waiter == rt_mutex_top_waiter(lock));
 627         struct task_struct *owner = rt_mutex_owner(lock);
 628         struct rt_mutex *next_lock = NULL;
 629         unsigned long flags;
 630
 631         raw_spin_lock_irqsave(&current->pi_lock, flags);
 632         plist_del(&waiter->list_entry, &lock->wait_list);
 633         current->pi_blocked_on = NULL;
 634         raw_spin_unlock_irqrestore(&current->pi_lock, flags);
 635
 636         if (!owner)
 637                 return;
 638
 639         if (first) {
 640
 641                 raw_spin_lock_irqsave(&owner->pi_lock, flags);
 642
 643                 plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
 644
 645                 if (rt_mutex_has_waiters(lock)) {
 646                         struct rt_mutex_waiter *next;
 647
 648                         next = rt_mutex_top_waiter(lock);
 649                         plist_add(&next->pi_list_entry, &owner->pi_waiters);
 650                 }
 651                 __rt_mutex_adjust_prio(owner);
 652
 653                 /* Store the lock on which owner is blocked or NULL */
 654                 next_lock = task_blocked_on_lock(owner);
 655
 656                 raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
 657         }
 658
 659         WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
 660
 661         if (!next_lock)
 662                 return;
 663
 664         /* gets dropped in rt_mutex_adjust_prio_chain()! */
 665         get_task_struct(owner);
 666
 667         raw_spin_unlock(&lock->wait_lock);
 668
 669         rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
 670
 671         raw_spin_lock(&lock->wait_lock);
 672 }
 673
 674 /*
 675  * Recheck the pi chain, in case we got a priority setting
 676  *
 677  * Called from sched_setscheduler
 678  */
 679 void rt_mutex_adjust_pi(struct task_struct *task)
 680 {
 681         struct rt_mutex_waiter *waiter;
 682         struct rt_mutex *next_lock;
 683         unsigned long flags;
 684
 685         raw_spin_lock_irqsave(&task->pi_lock, flags);
 686
 687         waiter = task->pi_blocked_on;
 688         if (!waiter || waiter->list_entry.prio == task->prio) {
 689                 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 690                 return;
 691         }
 692         next_lock = waiter->lock;
 693         raw_spin_unlock_irqrestore(&task->pi_lock, flags);
 694
 695         /* gets dropped in rt_mutex_adjust_prio_chain()! */
 696         get_task_struct(task);
 697
 698         rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
 699 }
 700
 701 /**
 702  * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
 703  * @lock:                the rt_mutex to take
 704  * @state:               the state the task should block in (TASK_INTERRUPTIBLE
 705  *                       or TASK_UNINTERRUPTIBLE)
 706  * @timeout:             the pre-initialized and started timer, or NULL for none
 707  * @waiter:              the pre-initialized rt_mutex_waiter
 708  *
 709  * lock->wait_lock must be held by the caller.
 710  */
 711 static int __sched
 712 __rt_mutex_slowlock(struct rt_mutex *lock, int state,
 713                     struct hrtimer_sleeper *timeout,
 714                     struct rt_mutex_waiter *waiter)
 715 {
 716         int ret = 0;
 717
 718         for (;;) {
 719                 /* Try to acquire the lock: */
 720                 if (try_to_take_rt_mutex(lock, current, waiter))
 721                         break;
 722
 723                 /*
 724                  * TASK_INTERRUPTIBLE checks for signals and
 725                  * timeout. Ignored otherwise.
 726                  */
 727                 if (unlikely(state == TASK_INTERRUPTIBLE)) {
 728                         /* Signal pending? */
 729                         if (signal_pending(current))
 730                                 ret = -EINTR;
 731                         if (timeout && !timeout->task)
 732                                 ret = -ETIMEDOUT;
 733                         if (ret)
 734                                 break;
 735                 }
 736
 737                 raw_spin_unlock(&lock->wait_lock);
 738
 739                 debug_rt_mutex_print_deadlock(waiter);
 740
 741                 schedule_rt_mutex(lock);
 742
 743                 raw_spin_lock(&lock->wait_lock);
 744                 set_current_state(state);
 745         }
 746
 747         return ret;
 748 }
 749
 750 static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
 751                                      struct rt_mutex_waiter *w)
 752 {
 753         /*
 754          * If the result is not -EDEADLOCK or the caller requested
 755          * deadlock detection, nothing to do here.
 756          */
 757         if (res != -EDEADLOCK || detect_deadlock)
 758                 return;
 759
 760         /*
 761          * Yell lowdly and stop the task right here.
 762          */
 763         rt_mutex_print_deadlock(w);
 764         while (1) {
 765                 set_current_state(TASK_INTERRUPTIBLE);
 766                 schedule();
 767         }
 768 }
 769
 770 /*
 771  * Slow path lock function:
 772  */
 773 static int __sched
 774 rt_mutex_slowlock(struct rt_mutex *lock, int state,
 775                   struct hrtimer_sleeper *timeout,
 776                   int detect_deadlock)
 777 {
 778         struct rt_mutex_waiter waiter;
 779         int ret = 0;
 780
 781         debug_rt_mutex_init_waiter(&waiter);
 782
 783         raw_spin_lock(&lock->wait_lock);
 784
 785         /* Try to acquire the lock again: */
 786         if (try_to_take_rt_mutex(lock, current, NULL)) {
 787                 raw_spin_unlock(&lock->wait_lock);
 788                 return 0;
 789         }
 790
 791         set_current_state(state);
 792
 793         /* Setup the timer, when timeout != NULL */
 794         if (unlikely(timeout)) {
 795                 hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
 796                 if (!hrtimer_active(&timeout->timer))
 797                         timeout->task = NULL;
 798         }
 799
 800         ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock);
 801
 802         if (likely(!ret))
 803                 ret = __rt_mutex_slowlock(lock, state, timeout, &waiter);
 804
 805         set_current_state(TASK_RUNNING);
 806
 807         if (unlikely(ret)) {
 808                 remove_waiter(lock, &waiter);
 809                 rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter);
 810         }
 811
 812         /*
 813          * try_to_take_rt_mutex() sets the waiter bit
 814          * unconditionally. We might have to fix that up.
 815          */
 816         fixup_rt_mutex_waiters(lock);
 817
 818         raw_spin_unlock(&lock->wait_lock);
 819
 820         /* Remove pending timer: */
 821         if (unlikely(timeout))
 822                 hrtimer_cancel(&timeout->timer);
 823
 824         debug_rt_mutex_free_waiter(&waiter);
 825
 826         return ret;
 827 }
 828
 829 /*
 830  * Slow path try-lock function:
 831  */
 832 static inline int
 833 rt_mutex_slowtrylock(struct rt_mutex *lock)
 834 {
 835         int ret = 0;
 836
 837         raw_spin_lock(&lock->wait_lock);
 838
 839         if (likely(rt_mutex_owner(lock) != current)) {
 840
 841                 ret = try_to_take_rt_mutex(lock, current, NULL);
 842                 /*
 843                  * try_to_take_rt_mutex() sets the lock waiters
 844                  * bit unconditionally. Clean this up.
 845                  */
 846                 fixup_rt_mutex_waiters(lock);
 847         }
 848
 849         raw_spin_unlock(&lock->wait_lock);
 850
 851         return ret;
 852 }
 853
 854 /*
 855  * Slow path to release a rt-mutex:
 856  */
 857 static void __sched
 858 rt_mutex_slowunlock(struct rt_mutex *lock)
 859 {
 860         raw_spin_lock(&lock->wait_lock);
 861
 862         debug_rt_mutex_unlock(lock);
 863
 864         rt_mutex_deadlock_account_unlock(current);
 865
 866         /*
 867          * We must be careful here if the fast path is enabled. If we
 868          * have no waiters queued we cannot set owner to NULL here
 869          * because of:
 870          *
 871          * foo->lock->owner = NULL;
 872          *                      rtmutex_lock(foo->lock);   <- fast path
 873          *                      free = atomic_dec_and_test(foo->refcnt);
 874          *                      rtmutex_unlock(foo->lock); <- fast path
 875          *                      if (free)
 876          *                              kfree(foo);
 877          * raw_spin_unlock(foo->lock->wait_lock);
 878          *
 879          * So for the fastpath enabled kernel:
 880          *
 881          * Nothing can set the waiters bit as long as we hold
 882          * lock->wait_lock. So we do the following sequence:
 883          *
 884          *      owner = rt_mutex_owner(lock);
 885          *      clear_rt_mutex_waiters(lock);
 886          *      raw_spin_unlock(&lock->wait_lock);
 887          *      if (cmpxchg(&lock->owner, owner, 0) == owner)
 888          *              return;
 889          *      goto retry;
 890          *
 891          * The fastpath disabled variant is simple as all access to
 892          * lock->owner is serialized by lock->wait_lock:
 893          *
 894          *      lock->owner = NULL;
 895          *      raw_spin_unlock(&lock->wait_lock);
 896          */
 897         while (!rt_mutex_has_waiters(lock)) {
 898                 /* Drops lock->wait_lock ! */
 899                 if (unlock_rt_mutex_safe(lock) == true)
 900                         return;
 901                 /* Relock the rtmutex and try again */
 902                 raw_spin_lock(&lock->wait_lock);
 903         }
 904
 905         /*
 906          * The wakeup next waiter path does not suffer from the above
 907          * race. See the comments there.
 908          */
 909         wakeup_next_waiter(lock);
 910
 911         raw_spin_unlock(&lock->wait_lock);
 912
 913         /* Undo pi boosting if necessary: */
 914         rt_mutex_adjust_prio(current);
 915 }
 916
 917 /*
 918  * debug aware fast / slowpath lock,trylock,unlock
 919  *
 920  * The atomic acquire/release ops are compiled away, when either the
 921  * architecture does not support cmpxchg or when debugging is enabled.
 922  */
 923 static inline int
 924 rt_mutex_fastlock(struct rt_mutex *lock, int state,
 925                   int detect_deadlock,
 926                   int (*slowfn)(struct rt_mutex *lock, int state,
 927                                 struct hrtimer_sleeper *timeout,
 928                                 int detect_deadlock))
 929 {
 930         if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 931                 rt_mutex_deadlock_account_lock(lock, current);
 932                 return 0;
 933         } else
 934                 return slowfn(lock, state, NULL, detect_deadlock);
 935 }
 936
 937 static inline int
 938 rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
 939                         struct hrtimer_sleeper *timeout, int detect_deadlock,
 940                         int (*slowfn)(struct rt_mutex *lock, int state,
 941                                       struct hrtimer_sleeper *timeout,
 942                                       int detect_deadlock))
 943 {
 944         if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 945                 rt_mutex_deadlock_account_lock(lock, current);
 946                 return 0;
 947         } else
 948                 return slowfn(lock, state, timeout, detect_deadlock);
 949 }
 950
 951 static inline int
 952 rt_mutex_fasttrylock(struct rt_mutex *lock,
 953                      int (*slowfn)(struct rt_mutex *lock))
 954 {
 955         if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
 956                 rt_mutex_deadlock_account_lock(lock, current);
 957                 return 1;
 958         }
 959         return slowfn(lock);
 960 }
 961
 962 static inline void
 963 rt_mutex_fastunlock(struct rt_mutex *lock,
 964                     void (*slowfn)(struct rt_mutex *lock))
 965 {
 966         if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
 967                 rt_mutex_deadlock_account_unlock(current);
 968         else
 969                 slowfn(lock);
 970 }
 971
 972 /**
 973  * rt_mutex_lock - lock a rt_mutex
 974  *
 975  * @lock: the rt_mutex to be locked
 976  */
 977 void __sched rt_mutex_lock(struct rt_mutex *lock)
 978 {
 979         might_sleep();
 980
 981         rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, rt_mutex_slowlock);
 982 }
 983 EXPORT_SYMBOL_GPL(rt_mutex_lock);
 984
 985 /**
 986  * rt_mutex_lock_interruptible - lock a rt_mutex interruptible
 987  *
 988  * @lock:               the rt_mutex to be locked
 989  * @detect_deadlock:    deadlock detection on/off
 990  *
 991  * Returns:
 992  *  0           on success
 993  * -EINTR       when interrupted by a signal
 994  * -EDEADLK     when the lock would deadlock (when deadlock detection is on)
 995  */
 996 int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
 997                                                  int detect_deadlock)
 998 {
 999         might_sleep();
1000
1001         return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE,
1002                                  detect_deadlock, rt_mutex_slowlock);
1003 }
1004 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
1005
1006 /**
1007  * rt_mutex_timed_lock - lock a rt_mutex interruptible
1008  *                      the timeout structure is provided
1009  *                      by the caller
1010  *
1011  * @lock:               the rt_mutex to be locked
1012  * @timeout:            timeout structure or NULL (no timeout)
1013  * @detect_deadlock:    deadlock detection on/off
1014  *
1015  * Returns:
1016  *  0           on success
1017  * -EINTR       when interrupted by a signal
1018  * -ETIMEDOUT   when the timeout expired
1019  * -EDEADLK     when the lock would deadlock (when deadlock detection is on)
1020  */
1021 int
1022 rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout,
1023                     int detect_deadlock)
1024 {
1025         might_sleep();
1026
1027         return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
1028                                        detect_deadlock, rt_mutex_slowlock);
1029 }
1030 EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
1031
1032 /**
1033  * rt_mutex_trylock - try to lock a rt_mutex
1034  *
1035  * @lock:       the rt_mutex to be locked
1036  *
1037  * Returns 1 on success and 0 on contention
1038  */
1039 int __sched rt_mutex_trylock(struct rt_mutex *lock)
1040 {
1041         return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
1042 }
1043 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
1044
1045 /**
1046  * rt_mutex_unlock - unlock a rt_mutex
1047  *
1048  * @lock: the rt_mutex to be unlocked
1049  */
1050 void __sched rt_mutex_unlock(struct rt_mutex *lock)
1051 {
1052         rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
1053 }
1054 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
1055
1056 /**
1057  * rt_mutex_destroy - mark a mutex unusable
1058  * @lock: the mutex to be destroyed
1059  *
1060  * This function marks the mutex uninitialized, and any subsequent
1061  * use of the mutex is forbidden. The mutex must not be locked when
1062  * this function is called.
1063  */
1064 void rt_mutex_destroy(struct rt_mutex *lock)
1065 {
1066         WARN_ON(rt_mutex_is_locked(lock));
1067 #ifdef CONFIG_DEBUG_RT_MUTEXES
1068         lock->magic = NULL;
1069 #endif
1070 }
1071
1072 EXPORT_SYMBOL_GPL(rt_mutex_destroy);
1073
1074 /**
1075  * __rt_mutex_init - initialize the rt lock
1076  *
1077  * @lock: the rt lock to be initialized
1078  *
1079  * Initialize the rt lock to unlocked state.
1080  *
1081  * Initializing of a locked rt lock is not allowed
1082  */
1083 void __rt_mutex_init(struct rt_mutex *lock, const char *name)
1084 {
1085         lock->owner = NULL;
1086         raw_spin_lock_init(&lock->wait_lock);
1087         plist_head_init(&lock->wait_list);
1088
1089         debug_rt_mutex_init(lock, name);
1090 }
1091 EXPORT_SYMBOL_GPL(__rt_mutex_init);
1092
1093 /**
1094  * rt_mutex_init_proxy_locked - initialize and lock a rt_mutex on behalf of a
1095  *                              proxy owner
1096  *
1097  * @lock:       the rt_mutex to be locked
1098  * @proxy_owner:the task to set as owner
1099  *
1100  * No locking. Caller has to do serializing itself
1101  * Special API call for PI-futex support
1102  */
1103 void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
1104                                 struct task_struct *proxy_owner)
1105 {
1106         __rt_mutex_init(lock, NULL);
1107         debug_rt_mutex_proxy_lock(lock, proxy_owner);
1108         rt_mutex_set_owner(lock, proxy_owner);
1109         rt_mutex_deadlock_account_lock(lock, proxy_owner);
1110 }
1111
1112 /**
1113  * rt_mutex_proxy_unlock - release a lock on behalf of owner
1114  *
1115  * @lock:       the rt_mutex to be locked
1116  *
1117  * No locking. Caller has to do serializing itself
1118  * Special API call for PI-futex support
1119  */
1120 void rt_mutex_proxy_unlock(struct rt_mutex *lock,
1121                            struct task_struct *proxy_owner)
1122 {
1123         debug_rt_mutex_proxy_unlock(lock);
1124         rt_mutex_set_owner(lock, NULL);
1125         rt_mutex_deadlock_account_unlock(proxy_owner);
1126 }
1127
1128 /**
1129  * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
1130  * @lock:               the rt_mutex to take
1131  * @waiter:             the pre-initialized rt_mutex_waiter
1132  * @task:               the task to prepare
1133  * @detect_deadlock:    perform deadlock detection (1) or not (0)
1134  *
1135  * Returns:
1136  *  0 - task blocked on lock
1137  *  1 - acquired the lock for task, caller should wake it up
1138  * <0 - error
1139  *
1140  * Special API call for FUTEX_REQUEUE_PI support.
1141  */
1142 int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1143                               struct rt_mutex_waiter *waiter,
1144                               struct task_struct *task, int detect_deadlock)
1145 {
1146         int ret;
1147
1148         raw_spin_lock(&lock->wait_lock);
1149
1150         if (try_to_take_rt_mutex(lock, task, NULL)) {
1151                 raw_spin_unlock(&lock->wait_lock);
1152                 return 1;
1153         }
1154
1155         /* We enforce deadlock detection for futexes */
1156         ret = task_blocks_on_rt_mutex(lock, waiter, task, 1);
1157
1158         if (ret && !rt_mutex_owner(lock)) {
1159                 /*
1160                  * Reset the return value. We might have
1161                  * returned with -EDEADLK and the owner
1162                  * released the lock while we were walking the
1163                  * pi chain.  Let the waiter sort it out.
1164                  */
1165                 ret = 0;
1166         }
1167
1168         if (unlikely(ret))
1169                 remove_waiter(lock, waiter);
1170
1171         raw_spin_unlock(&lock->wait_lock);
1172
1173         debug_rt_mutex_print_deadlock(waiter);
1174
1175         return ret;
1176 }
1177
1178 /**
1179  * rt_mutex_next_owner - return the next owner of the lock
1180  *
1181  * @lock: the rt lock query
1182  *
1183  * Returns the next owner of the lock or NULL
1184  *
1185  * Caller has to serialize against other accessors to the lock
1186  * itself.
1187  *
1188  * Special API call for PI-futex support
1189  */
1190 struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
1191 {
1192         if (!rt_mutex_has_waiters(lock))
1193                 return NULL;
1194
1195         return rt_mutex_top_waiter(lock)->task;
1196 }
1197
1198 /**
1199  * rt_mutex_finish_proxy_lock() - Complete lock acquisition
1200  * @lock:               the rt_mutex we were woken on
1201  * @to:                 the timeout, null if none. hrtimer should already have
1202  *                      been started.
1203  * @waiter:             the pre-initialized rt_mutex_waiter
1204  * @detect_deadlock:    perform deadlock detection (1) or not (0)
1205  *
1206  * Complete the lock acquisition started our behalf by another thread.
1207  *
1208  * Returns:
1209  *  0 - success
1210  * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
1211  *
1212  * Special API call for PI-futex requeue support
1213  */
1214 int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
1215                                struct hrtimer_sleeper *to,
1216                                struct rt_mutex_waiter *waiter,
1217                                int detect_deadlock)
1218 {
1219         int ret;
1220
1221         raw_spin_lock(&lock->wait_lock);
1222
1223         set_current_state(TASK_INTERRUPTIBLE);
1224
1225         ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
1226
1227         set_current_state(TASK_RUNNING);
1228
1229         if (unlikely(ret))
1230                 remove_waiter(lock, waiter);
1231
1232         /*
1233          * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
1234          * have to fix that up.
1235          */
1236         fixup_rt_mutex_waiters(lock);
1237
1238         raw_spin_unlock(&lock->wait_lock);
1239
1240         return ret;
1241 }