cap_syslog: don't use WARN_ONCE for CAP_SYS_ADMIN deprecation warning
[linux-2.6.git] / kernel / sched_fair.c
index e32a9b7..bc8ee99 100644 (file)
@@ -135,14 +135,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
        return grp->my_q;
 }
 
-/* Given a group's cfs_rq on one cpu, return its corresponding cfs_rq on
- * another cpu ('this_cpu')
- */
-static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
-{
-       return cfs_rq->tg->cfs_rq[this_cpu];
-}
-
 static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 {
        if (!cfs_rq->on_list) {
@@ -271,11 +263,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
        return NULL;
 }
 
-static inline struct cfs_rq *cpu_cfs_rq(struct cfs_rq *cfs_rq, int this_cpu)
-{
-       return &cpu_rq(this_cpu)->cfs;
-}
-
 static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
 {
 }
@@ -334,11 +321,6 @@ static inline int entity_before(struct sched_entity *a,
        return (s64)(a->vruntime - b->vruntime) < 0;
 }
 
-static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-       return se->vruntime - cfs_rq->min_vruntime;
-}
-
 static void update_min_vruntime(struct cfs_rq *cfs_rq)
 {
        u64 vruntime = cfs_rq->min_vruntime;
@@ -372,7 +354,6 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
        struct rb_node **link = &cfs_rq->tasks_timeline.rb_node;
        struct rb_node *parent = NULL;
        struct sched_entity *entry;
-       s64 key = entity_key(cfs_rq, se);
        int leftmost = 1;
 
        /*
@@ -385,7 +366,7 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
                 * We dont care about collisions. Nodes with
                 * the same key stay together.
                 */
-               if (key < entity_key(cfs_rq, entry)) {
+               if (entity_before(se, entry)) {
                        link = &parent->rb_left;
                } else {
                        link = &parent->rb_right;
@@ -1076,8 +1057,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
        se->on_rq = 0;
        update_cfs_load(cfs_rq, 0);
        account_entity_dequeue(cfs_rq, se);
-       update_min_vruntime(cfs_rq);
-       update_cfs_shares(cfs_rq);
 
        /*
         * Normalize the entity after updating the min_vruntime because the
@@ -1086,6 +1065,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         */
        if (!(flags & DEQUEUE_SLEEP))
                se->vruntime -= cfs_rq->min_vruntime;
+
+       update_min_vruntime(cfs_rq);
+       update_cfs_shares(cfs_rq);
 }
 
 /*
@@ -1335,7 +1317,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
        }
 
        for_each_sched_entity(se) {
-               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+               cfs_rq = cfs_rq_of(se);
 
                update_cfs_load(cfs_rq, 0);
                update_cfs_shares(cfs_rq);
@@ -1369,13 +1351,16 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
                         */
                        if (task_sleep && parent_entity(se))
                                set_next_buddy(parent_entity(se));
+
+                       /* avoid re-evaluating load for this entity */
+                       se = parent_entity(se);
                        break;
                }
                flags |= DEQUEUE_SLEEP;
        }
 
        for_each_sched_entity(se) {
-               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+               cfs_rq = cfs_rq_of(se);
 
                update_cfs_load(cfs_rq, 0);
                update_cfs_shares(cfs_rq);
@@ -1480,7 +1465,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
         * effect of the currently running task from the load
         * of the current CPU:
         */
-       rcu_read_lock();
        if (sync) {
                tg = task_group(current);
                weight = current->se.load.weight;
@@ -1516,7 +1500,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
                balanced = this_eff_load <= prev_eff_load;
        } else
                balanced = true;
-       rcu_read_unlock();
 
        /*
         * If the currently running task will sleep within
@@ -1584,7 +1567,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
                }
 
                /* Adjust by relative CPU power of the group */
-               avg_load = (avg_load * SCHED_POWER_SCALE) / group->cpu_power;
+               avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
 
                if (local_group) {
                        this_load = avg_load;
@@ -1920,8 +1903,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
        if (!sched_feat(WAKEUP_PREEMPT))
                return;
 
-       update_curr(cfs_rq);
        find_matching_se(&se, &pse);
+       update_curr(cfs_rq_of(se));
        BUG_ON(!pse);
        if (wakeup_preempt_entity(se, pse) == 1) {
                /*
@@ -2230,11 +2213,43 @@ static void update_shares(int cpu)
        struct rq *rq = cpu_rq(cpu);
 
        rcu_read_lock();
+       /*
+        * Iterates the task_group tree in a bottom up fashion, see
+        * list_add_leaf_cfs_rq() for details.
+        */
        for_each_leaf_cfs_rq(rq, cfs_rq)
                update_shares_cpu(cfs_rq->tg, cpu);
        rcu_read_unlock();
 }
 
+/*
+ * Compute the cpu's hierarchical load factor for each task group.
+ * This needs to be done in a top-down fashion because the load of a child
+ * group is a fraction of its parents load.
+ */
+static int tg_load_down(struct task_group *tg, void *data)
+{
+       unsigned long load;
+       long cpu = (long)data;
+
+       if (!tg->parent) {
+               load = cpu_rq(cpu)->load.weight;
+       } else {
+               load = tg->parent->cfs_rq[cpu]->h_load;
+               load *= tg->se[cpu]->load.weight;
+               load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
+       }
+
+       tg->cfs_rq[cpu]->h_load = load;
+
+       return 0;
+}
+
+static void update_h_load(long cpu)
+{
+       walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                  unsigned long max_load_move,
@@ -2242,14 +2257,12 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                  int *all_pinned)
 {
        long rem_load_move = max_load_move;
-       int busiest_cpu = cpu_of(busiest);
-       struct task_group *tg;
+       struct cfs_rq *busiest_cfs_rq;
 
        rcu_read_lock();
-       update_h_load(busiest_cpu);
+       update_h_load(cpu_of(busiest));
 
-       list_for_each_entry_rcu(tg, &task_groups, list) {
-               struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
+       for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
                unsigned long busiest_h_load = busiest_cfs_rq->h_load;
                unsigned long busiest_weight = busiest_cfs_rq->load.weight;
                u64 rem_load, moved_load;
@@ -2630,7 +2643,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
                power >>= SCHED_POWER_SHIFT;
        }
 
-       sdg->cpu_power_orig = power;
+       sdg->sgp->power_orig = power;
 
        if (sched_feat(ARCH_POWER))
                power *= arch_scale_freq_power(sd, cpu);
@@ -2646,7 +2659,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
                power = 1;
 
        cpu_rq(cpu)->cpu_power = power;
-       sdg->cpu_power = power;
+       sdg->sgp->power = power;
 }
 
 static void update_group_power(struct sched_domain *sd, int cpu)
@@ -2664,11 +2677,11 @@ static void update_group_power(struct sched_domain *sd, int cpu)
 
        group = child->groups;
        do {
-               power += group->cpu_power;
+               power += group->sgp->power;
                group = group->next;
        } while (group != child->groups);
 
-       sdg->cpu_power = power;
+       sdg->sgp->power = power;
 }
 
 /*
@@ -2690,7 +2703,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
        /*
         * If ~90% of the cpu_power is still there, we're good.
         */
-       if (group->cpu_power * 32 > group->cpu_power_orig * 29)
+       if (group->sgp->power * 32 > group->sgp->power_orig * 29)
                return 1;
 
        return 0;
@@ -2770,7 +2783,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
        }
 
        /* Adjust by relative CPU power of the group */
-       sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->cpu_power;
+       sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / group->sgp->power;
 
        /*
         * Consider the group unbalanced when the imbalance is larger
@@ -2787,7 +2800,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
        if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1)
                sgs->group_imb = 1;
 
-       sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power,
+       sgs->group_capacity = DIV_ROUND_CLOSEST(group->sgp->power,
                                                SCHED_POWER_SCALE);
        if (!sgs->group_capacity)
                sgs->group_capacity = fix_small_capacity(sd, group);
@@ -2876,7 +2889,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
                        return;
 
                sds->total_load += sgs.group_load;
-               sds->total_pwr += sg->cpu_power;
+               sds->total_pwr += sg->sgp->power;
 
                /*
                 * In case the child domain prefers tasks go to siblings
@@ -2961,7 +2974,7 @@ static int check_asym_packing(struct sched_domain *sd,
        if (this_cpu > busiest_cpu)
                return 0;
 
-       *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->cpu_power,
+       *imbalance = DIV_ROUND_CLOSEST(sds->max_load * sds->busiest->sgp->power,
                                       SCHED_POWER_SCALE);
        return 1;
 }
@@ -2992,7 +3005,7 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
 
        scaled_busy_load_per_task = sds->busiest_load_per_task
                                         * SCHED_POWER_SCALE;
-       scaled_busy_load_per_task /= sds->busiest->cpu_power;
+       scaled_busy_load_per_task /= sds->busiest->sgp->power;
 
        if (sds->max_load - sds->this_load + scaled_busy_load_per_task >=
                        (scaled_busy_load_per_task * imbn)) {
@@ -3006,28 +3019,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
         * moving them.
         */
 
-       pwr_now += sds->busiest->cpu_power *
+       pwr_now += sds->busiest->sgp->power *
                        min(sds->busiest_load_per_task, sds->max_load);
-       pwr_now += sds->this->cpu_power *
+       pwr_now += sds->this->sgp->power *
                        min(sds->this_load_per_task, sds->this_load);
        pwr_now /= SCHED_POWER_SCALE;
 
        /* Amount of load we'd subtract */
        tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
-               sds->busiest->cpu_power;
+               sds->busiest->sgp->power;
        if (sds->max_load > tmp)
-               pwr_move += sds->busiest->cpu_power *
+               pwr_move += sds->busiest->sgp->power *
                        min(sds->busiest_load_per_task, sds->max_load - tmp);
 
        /* Amount of load we'd add */
-       if (sds->max_load * sds->busiest->cpu_power <
+       if (sds->max_load * sds->busiest->sgp->power <
                sds->busiest_load_per_task * SCHED_POWER_SCALE)
-               tmp = (sds->max_load * sds->busiest->cpu_power) /
-                       sds->this->cpu_power;
+               tmp = (sds->max_load * sds->busiest->sgp->power) /
+                       sds->this->sgp->power;
        else
                tmp = (sds->busiest_load_per_task * SCHED_POWER_SCALE) /
-                       sds->this->cpu_power;
-       pwr_move += sds->this->cpu_power *
+                       sds->this->sgp->power;
+       pwr_move += sds->this->sgp->power *
                        min(sds->this_load_per_task, sds->this_load + tmp);
        pwr_move /= SCHED_POWER_SCALE;
 
@@ -3073,7 +3086,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
 
                load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
 
-               load_above_capacity /= sds->busiest->cpu_power;
+               load_above_capacity /= sds->busiest->sgp->power;
        }
 
        /*
@@ -3089,8 +3102,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
        max_pull = min(sds->max_load - sds->avg_load, load_above_capacity);
 
        /* How much load to actually move to equalise the imbalance */
-       *imbalance = min(max_pull * sds->busiest->cpu_power,
-               (sds->avg_load - sds->this_load) * sds->this->cpu_power)
+       *imbalance = min(max_pull * sds->busiest->sgp->power,
+               (sds->avg_load - sds->this_load) * sds->this->sgp->power)
                        / SCHED_POWER_SCALE;
 
        /*