SCHED_TTWU_QUEUE is not longer needed since sparc32 now implements IPI
[linux-2.6.git] / kernel / sched.c
index d398f2f..c4b3410 100644 (file)
@@ -312,6 +312,9 @@ struct cfs_rq {
 
        u64 exec_clock;
        u64 min_vruntime;
+#ifndef CONFIG_64BIT
+       u64 min_vruntime_copy;
+#endif
 
        struct rb_root tasks_timeline;
        struct rb_node *rb_leftmost;
@@ -553,6 +556,10 @@ struct rq {
        unsigned int ttwu_count;
        unsigned int ttwu_local;
 #endif
+
+#ifdef CONFIG_SMP
+       struct task_struct *wake_list;
+#endif
 };
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -596,7 +603,7 @@ static inline int cpu_of(struct rq *rq)
  * Return the group to which this tasks belongs.
  *
  * We use task_subsys_state_check() and extend the RCU verification
- * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach()
+ * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
  * holds that lock for each task it moves into the cgroup. Therefore
  * by holding that lock, we pin the task to the current cgroup.
  */
@@ -606,7 +613,7 @@ static inline struct task_group *task_group(struct task_struct *p)
        struct cgroup_subsys_state *css;
 
        css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
-                       lockdep_is_held(&task_rq(p)->lock));
+                       lockdep_is_held(&p->pi_lock));
        tg = container_of(css, struct task_group, css);
 
        return autogroup_task_group(p, tg);
@@ -921,23 +928,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
 
 /*
- * Check whether the task is waking, we use this to synchronize ->cpus_allowed
- * against ttwu().
- */
-static inline int task_is_waking(struct task_struct *p)
-{
-       return unlikely(p->state == TASK_WAKING);
-}
-
-/*
- * __task_rq_lock - lock the runqueue a given task resides on.
- * Must be called interrupts disabled.
+ * __task_rq_lock - lock the rq @p resides on.
  */
 static inline struct rq *__task_rq_lock(struct task_struct *p)
        __acquires(rq->lock)
 {
        struct rq *rq;
 
+       lockdep_assert_held(&p->pi_lock);
+
        for (;;) {
                rq = task_rq(p);
                raw_spin_lock(&rq->lock);
@@ -948,22 +947,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
 }
 
 /*
- * task_rq_lock - lock the runqueue a given task resides on and disable
- * interrupts. Note the ordering: we can safely lookup the task_rq without
- * explicitly disabling preemption.
+ * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
  */
 static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
+       __acquires(p->pi_lock)
        __acquires(rq->lock)
 {
        struct rq *rq;
 
        for (;;) {
-               local_irq_save(*flags);
+               raw_spin_lock_irqsave(&p->pi_lock, *flags);
                rq = task_rq(p);
                raw_spin_lock(&rq->lock);
                if (likely(rq == task_rq(p)))
                        return rq;
-               raw_spin_unlock_irqrestore(&rq->lock, *flags);
+               raw_spin_unlock(&rq->lock);
+               raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
        }
 }
 
@@ -973,10 +972,13 @@ static void __task_rq_unlock(struct rq *rq)
        raw_spin_unlock(&rq->lock);
 }
 
-static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
+static inline void
+task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
        __releases(rq->lock)
+       __releases(p->pi_lock)
 {
-       raw_spin_unlock_irqrestore(&rq->lock, *flags);
+       raw_spin_unlock(&rq->lock);
+       raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
 }
 
 /*
@@ -2172,6 +2174,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
         */
        WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
                        !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
+
+#ifdef CONFIG_LOCKDEP
+       WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
+                                     lockdep_is_held(&task_rq(p)->lock)));
+#endif
 #endif
 
        trace_sched_migrate_task(p, new_cpu);
@@ -2195,13 +2202,15 @@ static int migration_cpu_stop(void *data);
  * The task's runqueue lock must be held.
  * Returns true if you have to wait for migration thread.
  */
-static bool migrate_task(struct task_struct *p, struct rq *rq)
+static bool need_migrate_task(struct task_struct *p)
 {
        /*
         * If the task is not on a runqueue (and not running), then
         * the next wake-up will properly place the task.
         */
-       return p->on_rq || task_running(rq, p);
+       bool running = p->on_rq || p->on_cpu;
+       smp_rmb(); /* finish_lock_switch() */
+       return running;
 }
 
 /*
@@ -2265,7 +2274,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
                ncsw = 0;
                if (!match_state || p->state == match_state)
                        ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
-               task_rq_unlock(rq, &flags);
+               task_rq_unlock(rq, p, &flags);
 
                /*
                 * If it changed from the expected state, bail out now.
@@ -2376,9 +2385,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
  * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
  */
 static inline
-int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
+int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
 {
-       int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
+       int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
 
        /*
         * In order not to call set_task_cpu() on a blocking task we need
@@ -2405,9 +2414,11 @@ static void update_avg(u64 *avg, u64 sample)
 #endif
 
 static void
-ttwu_stat(struct rq *rq, struct task_struct *p, int cpu, int wake_flags)
+ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 {
 #ifdef CONFIG_SCHEDSTATS
+       struct rq *rq = this_rq();
+
 #ifdef CONFIG_SMP
        int this_cpu = smp_processor_id();
 
@@ -2449,8 +2460,11 @@ static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
                wq_worker_waking_up(p, cpu_of(rq));
 }
 
+/*
+ * Mark the task runnable and perform wakeup-preemption.
+ */
 static void
-ttwu_post_activation(struct task_struct *p, struct rq *rq, int wake_flags)
+ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
 {
        trace_sched_wakeup(p, true);
        check_preempt_curr(rq, p, wake_flags);
@@ -2473,6 +2487,99 @@ ttwu_post_activation(struct task_struct *p, struct rq *rq, int wake_flags)
 #endif
 }
 
+static void
+ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
+{
+#ifdef CONFIG_SMP
+       if (p->sched_contributes_to_load)
+               rq->nr_uninterruptible--;
+#endif
+
+       ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
+       ttwu_do_wakeup(rq, p, wake_flags);
+}
+
+/*
+ * Called in case the task @p isn't fully descheduled from its runqueue,
+ * in this case we must do a remote wakeup. Its a 'light' wakeup though,
+ * since all we need to do is flip p->state to TASK_RUNNING, since
+ * the task is still ->on_rq.
+ */
+static int ttwu_remote(struct task_struct *p, int wake_flags)
+{
+       struct rq *rq;
+       int ret = 0;
+
+       rq = __task_rq_lock(p);
+       if (p->on_rq) {
+               ttwu_do_wakeup(rq, p, wake_flags);
+               ret = 1;
+       }
+       __task_rq_unlock(rq);
+
+       return ret;
+}
+
+#ifdef CONFIG_SMP
+static void sched_ttwu_pending(void)
+{
+       struct rq *rq = this_rq();
+       struct task_struct *list = xchg(&rq->wake_list, NULL);
+
+       if (!list)
+               return;
+
+       raw_spin_lock(&rq->lock);
+
+       while (list) {
+               struct task_struct *p = list;
+               list = list->wake_entry;
+               ttwu_do_activate(rq, p, 0);
+       }
+
+       raw_spin_unlock(&rq->lock);
+}
+
+void scheduler_ipi(void)
+{
+       sched_ttwu_pending();
+}
+
+static void ttwu_queue_remote(struct task_struct *p, int cpu)
+{
+       struct rq *rq = cpu_rq(cpu);
+       struct task_struct *next = rq->wake_list;
+
+       for (;;) {
+               struct task_struct *old = next;
+
+               p->wake_entry = next;
+               next = cmpxchg(&rq->wake_list, old, p);
+               if (next == old)
+                       break;
+       }
+
+       if (!next)
+               smp_send_reschedule(cpu);
+}
+#endif
+
+static void ttwu_queue(struct task_struct *p, int cpu)
+{
+       struct rq *rq = cpu_rq(cpu);
+
+#if defined(CONFIG_SMP)
+       if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
+               ttwu_queue_remote(p, cpu);
+               return;
+       }
+#endif
+
+       raw_spin_lock(&rq->lock);
+       ttwu_do_activate(rq, p, 0);
+       raw_spin_unlock(&rq->lock);
+}
+
 /**
  * try_to_wake_up - wake up a thread
  * @p: the thread to be awakened
@@ -2488,79 +2595,64 @@ ttwu_post_activation(struct task_struct *p, struct rq *rq, int wake_flags)
  * Returns %true if @p was woken up, %false if it was already running
  * or @state didn't match @p's state.
  */
-static int try_to_wake_up(struct task_struct *p, unsigned int state,
-                         int wake_flags)
+static int
+try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 {
-       int cpu, orig_cpu, this_cpu, success = 0;
        unsigned long flags;
-       unsigned long en_flags = ENQUEUE_WAKEUP;
-       struct rq *rq;
-
-       this_cpu = get_cpu();
+       int cpu, success = 0;
 
        smp_wmb();
        raw_spin_lock_irqsave(&p->pi_lock, flags);
-       rq = __task_rq_lock(p);
        if (!(p->state & state))
                goto out;
 
+       success = 1; /* we're going to change ->state */
        cpu = task_cpu(p);
 
-       if (p->on_rq)
-               goto out_running;
+       if (p->on_rq && ttwu_remote(p, wake_flags))
+               goto stat;
 
-       orig_cpu = cpu;
 #ifdef CONFIG_SMP
-       if (unlikely(task_running(rq, p)))
-               goto out_activate;
-
        /*
-        * In order to handle concurrent wakeups and release the rq->lock
-        * we put the task in TASK_WAKING state.
-        *
-        * First fix up the nr_uninterruptible count:
+        * If the owning (remote) cpu is still in the middle of schedule() with
+        * this task as prev, wait until its done referencing the task.
         */
-       if (task_contributes_to_load(p)) {
-               if (likely(cpu_online(orig_cpu)))
-                       rq->nr_uninterruptible--;
-               else
-                       this_rq()->nr_uninterruptible--;
+       while (p->on_cpu) {
+#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+               /*
+                * If called from interrupt context we could have landed in the
+                * middle of schedule(), in this case we should take care not
+                * to spin on ->on_cpu if p is current, since that would
+                * deadlock.
+                */
+               if (p == current) {
+                       ttwu_queue(p, cpu);
+                       goto stat;
+               }
+#endif
+               cpu_relax();
        }
+       /*
+        * Pairs with the smp_wmb() in finish_lock_switch().
+        */
+       smp_rmb();
+
+       p->sched_contributes_to_load = !!task_contributes_to_load(p);
        p->state = TASK_WAKING;
 
-       if (p->sched_class->task_waking) {
-               p->sched_class->task_waking(rq, p);
-               en_flags |= ENQUEUE_WAKING;
-       }
+       if (p->sched_class->task_waking)
+               p->sched_class->task_waking(p);
 
-       cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
-       if (cpu != orig_cpu)
+       cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+       if (task_cpu(p) != cpu)
                set_task_cpu(p, cpu);
-       __task_rq_unlock(rq);
-
-       rq = cpu_rq(cpu);
-       raw_spin_lock(&rq->lock);
-
-       /*
-        * We migrated the task without holding either rq->lock, however
-        * since the task is not on the task list itself, nobody else
-        * will try and migrate the task, hence the rq should match the
-        * cpu we just moved it to.
-        */
-       WARN_ON(task_cpu(p) != cpu);
-       WARN_ON(p->state != TASK_WAKING);
-
-out_activate:
 #endif /* CONFIG_SMP */
-       ttwu_activate(rq, p, en_flags);
-out_running:
-       ttwu_post_activation(p, rq, wake_flags);
-       ttwu_stat(rq, p, cpu, wake_flags);
-       success = 1;
+
+       ttwu_queue(p, cpu);
+stat:
+       ttwu_stat(p, cpu, wake_flags);
 out:
-       __task_rq_unlock(rq);
        raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-       put_cpu();
 
        return success;
 }
@@ -2569,9 +2661,9 @@ out:
  * try_to_wake_up_local - try to wake up a local task with rq lock held
  * @p: the thread to be awakened
  *
- * Put @p on the run-queue if it's not already there.  The caller must
+ * Put @p on the run-queue if it's not already there. The caller must
  * ensure that this_rq() is locked, @p is bound to this_rq() and not
- * the current task.  this_rq() stays locked over invocation.
+ * the current task.
  */
 static void try_to_wake_up_local(struct task_struct *p)
 {
@@ -2581,14 +2673,22 @@ static void try_to_wake_up_local(struct task_struct *p)
        BUG_ON(p == current);
        lockdep_assert_held(&rq->lock);
 
+       if (!raw_spin_trylock(&p->pi_lock)) {
+               raw_spin_unlock(&rq->lock);
+               raw_spin_lock(&p->pi_lock);
+               raw_spin_lock(&rq->lock);
+       }
+
        if (!(p->state & TASK_NORMAL))
-               return;
+               goto out;
 
        if (!p->on_rq)
                ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 
-       ttwu_post_activation(p, rq, 0);
-       ttwu_stat(rq, p, smp_processor_id(), 0);
+       ttwu_do_wakeup(rq, p, 0);
+       ttwu_stat(p, smp_processor_id(), 0);
+out:
+       raw_spin_unlock(&p->pi_lock);
 }
 
 /**
@@ -2647,6 +2747,7 @@ static void __sched_fork(struct task_struct *p)
  */
 void sched_fork(struct task_struct *p, int clone_flags)
 {
+       unsigned long flags;
        int cpu = get_cpu();
 
        __sched_fork(p);
@@ -2697,9 +2798,9 @@ void sched_fork(struct task_struct *p, int clone_flags)
         *
         * Silence PROVE_RCU.
         */
-       rcu_read_lock();
+       raw_spin_lock_irqsave(&p->pi_lock, flags);
        set_task_cpu(p, cpu);
-       rcu_read_unlock();
+       raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
        if (likely(sched_info_on()))
@@ -2730,28 +2831,18 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 {
        unsigned long flags;
        struct rq *rq;
-       int cpu __maybe_unused = get_cpu();
 
+       raw_spin_lock_irqsave(&p->pi_lock, flags);
 #ifdef CONFIG_SMP
-       rq = task_rq_lock(p, &flags);
-       p->state = TASK_WAKING;
-
        /*
         * Fork balancing, do it here and not earlier because:
         *  - cpus_allowed can change in the fork path
         *  - any previously selected cpu might disappear through hotplug
-        *
-        * We set TASK_WAKING so that select_task_rq() can drop rq->lock
-        * without people poking at ->cpus_allowed.
         */
-       cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
-       set_task_cpu(p, cpu);
-
-       p->state = TASK_RUNNING;
-       task_rq_unlock(rq, &flags);
+       set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
 #endif
 
-       rq = task_rq_lock(p, &flags);
+       rq = __task_rq_lock(p);
        activate_task(rq, p, 0);
        p->on_rq = 1;
        trace_sched_wakeup_new(p, true);
@@ -2760,8 +2851,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
        if (p->sched_class->task_woken)
                p->sched_class->task_woken(rq, p);
 #endif
-       task_rq_unlock(rq, &flags);
-       put_cpu();
+       task_rq_unlock(rq, p, &flags);
 }
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -3470,27 +3560,22 @@ void sched_exec(void)
 {
        struct task_struct *p = current;
        unsigned long flags;
-       struct rq *rq;
        int dest_cpu;
 
-       rq = task_rq_lock(p, &flags);
-       dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
+       raw_spin_lock_irqsave(&p->pi_lock, flags);
+       dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
        if (dest_cpu == smp_processor_id())
                goto unlock;
 
-       /*
-        * select_task_rq() can race against ->cpus_allowed
-        */
-       if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
-           likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
+       if (likely(cpu_active(dest_cpu))) {
                struct migration_arg arg = { p, dest_cpu };
 
-               task_rq_unlock(rq, &flags);
-               stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
+               raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+               stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
                return;
        }
 unlock:
-       task_rq_unlock(rq, &flags);
+       raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 }
 
 #endif
@@ -3527,7 +3612,7 @@ unsigned long long task_delta_exec(struct task_struct *p)
 
        rq = task_rq_lock(p, &flags);
        ns = do_task_delta_exec(p, rq);
-       task_rq_unlock(rq, &flags);
+       task_rq_unlock(rq, p, &flags);
 
        return ns;
 }
@@ -3545,7 +3630,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
 
        rq = task_rq_lock(p, &flags);
        ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
-       task_rq_unlock(rq, &flags);
+       task_rq_unlock(rq, p, &flags);
 
        return ns;
 }
@@ -3569,7 +3654,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p)
        rq = task_rq_lock(p, &flags);
        thread_group_cputime(p, &totals);
        ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
-       task_rq_unlock(rq, &flags);
+       task_rq_unlock(rq, p, &flags);
 
        return ns;
 }
@@ -4117,11 +4202,13 @@ need_resched:
                if (unlikely(signal_pending_state(prev->state, prev))) {
                        prev->state = TASK_RUNNING;
                } else {
+                       deactivate_task(rq, prev, DEQUEUE_SLEEP);
+                       prev->on_rq = 0;
+
                        /*
-                        * If a worker is going to sleep, notify and
-                        * ask workqueue whether it wants to wake up a
-                        * task to maintain concurrency.  If so, wake
-                        * up the task.
+                        * If a worker went to sleep, notify and ask workqueue
+                        * whether it wants to wake up a task to maintain
+                        * concurrency.
                         */
                        if (prev->flags & PF_WQ_WORKER) {
                                struct task_struct *to_wakeup;
@@ -4131,16 +4218,13 @@ need_resched:
                                        try_to_wake_up_local(to_wakeup);
                        }
 
-                       deactivate_task(rq, prev, DEQUEUE_SLEEP);
-                       prev->on_rq = 0;
-
                        /*
-                        * If we are going to sleep and we have plugged IO queued, make
-                        * sure to submit it to avoid deadlocks.
+                        * If we are going to sleep and we have plugged IO
+                        * queued, make sure to submit it to avoid deadlocks.
                         */
                        if (blk_needs_flush_plug(prev)) {
                                raw_spin_unlock(&rq->lock);
-                               blk_flush_plug(prev);
+                               blk_schedule_flush_plug(prev);
                                raw_spin_lock(&rq->lock);
                        }
                }
@@ -4689,16 +4773,13 @@ EXPORT_SYMBOL(sleep_on_timeout);
  */
 void rt_mutex_setprio(struct task_struct *p, int prio)
 {
-       unsigned long flags;
        int oldprio, on_rq, running;
        struct rq *rq;
        const struct sched_class *prev_class;
 
        BUG_ON(prio < 0 || prio > MAX_PRIO);
 
-       lockdep_assert_held(&p->pi_lock);
-
-       rq = task_rq_lock(p, &flags);
+       rq = __task_rq_lock(p);
 
        trace_sched_pi_setprio(p, prio);
        oldprio = p->prio;
@@ -4723,7 +4804,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
                enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
 
        check_class_changed(rq, p, prev_class, oldprio);
-       task_rq_unlock(rq, &flags);
+       __task_rq_unlock(rq);
 }
 
 #endif
@@ -4771,7 +4852,7 @@ void set_user_nice(struct task_struct *p, long nice)
                        resched_task(rq->curr);
        }
 out_unlock:
-       task_rq_unlock(rq, &flags);
+       task_rq_unlock(rq, p, &flags);
 }
 EXPORT_SYMBOL(set_user_nice);
 
@@ -4999,20 +5080,17 @@ recheck:
        /*
         * make sure no PI-waiters arrive (or leave) while we are
         * changing the priority of the task:
-        */
-       raw_spin_lock_irqsave(&p->pi_lock, flags);
-       /*
+        *
         * To be able to change p->policy safely, the appropriate
         * runqueue lock must be held.
         */
-       rq = __task_rq_lock(p);
+       rq = task_rq_lock(p, &flags);
 
        /*
         * Changing the policy of the stop threads its a very bad idea
         */
        if (p == rq->stop) {
-               __task_rq_unlock(rq);
-               raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+               task_rq_unlock(rq, p, &flags);
                return -EINVAL;
        }
 
@@ -5036,8 +5114,7 @@ recheck:
                if (rt_bandwidth_enabled() && rt_policy(policy) &&
                                task_group(p)->rt_bandwidth.rt_runtime == 0 &&
                                !task_group_is_autogroup(task_group(p))) {
-                       __task_rq_unlock(rq);
-                       raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+                       task_rq_unlock(rq, p, &flags);
                        return -EPERM;
                }
        }
@@ -5046,8 +5123,7 @@ recheck:
        /* recheck policy now with rq lock held */
        if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
                policy = oldpolicy = -1;
-               __task_rq_unlock(rq);
-               raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+               task_rq_unlock(rq, p, &flags);
                goto recheck;
        }
        on_rq = p->on_rq;
@@ -5069,8 +5145,7 @@ recheck:
                activate_task(rq, p, 0);
 
        check_class_changed(rq, p, prev_class, oldprio);
-       __task_rq_unlock(rq);
-       raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+       task_rq_unlock(rq, p, &flags);
 
        rt_mutex_adjust_pi(p);
 
@@ -5662,7 +5737,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 
        rq = task_rq_lock(p, &flags);
        time_slice = p->sched_class->get_rr_interval(rq, p);
-       task_rq_unlock(rq, &flags);
+       task_rq_unlock(rq, p, &flags);
 
        rcu_read_unlock();
        jiffies_to_timespec(time_slice, &t);
@@ -5885,8 +5960,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
        unsigned int dest_cpu;
        int ret = 0;
 
-       raw_spin_lock_irqsave(&p->pi_lock, flags);
-       rq = __task_rq_lock(p);
+       rq = task_rq_lock(p, &flags);
 
        if (!cpumask_intersects(new_mask, cpu_active_mask)) {
                ret = -EINVAL;
@@ -5911,18 +5985,16 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
                goto out;
 
        dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
-       if (migrate_task(p, rq)) {
+       if (need_migrate_task(p)) {
                struct migration_arg arg = { p, dest_cpu };
                /* Need help from migration thread: drop lock and wait. */
-               __task_rq_unlock(rq);
-               raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+               task_rq_unlock(rq, p, &flags);
                stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
                tlb_migrate_finish(p->mm);
                return 0;
        }
 out:
-       __task_rq_unlock(rq);
-       raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+       task_rq_unlock(rq, p, &flags);
 
        return ret;
 }
@@ -5950,6 +6022,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
        rq_src = cpu_rq(src_cpu);
        rq_dest = cpu_rq(dest_cpu);
 
+       raw_spin_lock(&p->pi_lock);
        double_rq_lock(rq_src, rq_dest);
        /* Already moved. */
        if (task_cpu(p) != src_cpu)
@@ -5972,6 +6045,7 @@ done:
        ret = 1;
 fail:
        double_rq_unlock(rq_src, rq_dest);
+       raw_spin_unlock(&p->pi_lock);
        return ret;
 }
 
@@ -6312,6 +6386,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 
 #ifdef CONFIG_HOTPLUG_CPU
        case CPU_DYING:
+               sched_ttwu_pending();
                /* Update our root-domain */
                raw_spin_lock_irqsave(&rq->lock, flags);
                if (rq->rd) {
@@ -8698,7 +8773,7 @@ void sched_move_task(struct task_struct *tsk)
        if (on_rq)
                enqueue_task(rq, tsk, 0);
 
-       task_rq_unlock(rq, &flags);
+       task_rq_unlock(rq, tsk, &flags);
 }
 #endif /* CONFIG_CGROUP_SCHED */