futex: Fix uninterruptible loop due to gate_area
[linux-2.6.git] / kernel / cpuset.c
index 2bb8c2e..10131fd 100644 (file)
@@ -55,7 +55,7 @@
 #include <linux/sort.h>
 
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
 #include <linux/cgroup.h>
@@ -1367,14 +1367,10 @@ static int fmeter_getrate(struct fmeter *fmp)
        return val;
 }
 
-/* Protected by cgroup_lock */
-static cpumask_var_t cpus_attach;
-
 /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
 static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-                            struct task_struct *tsk, bool threadgroup)
+                            struct task_struct *tsk)
 {
-       int ret;
        struct cpuset *cs = cgroup_cs(cont);
 
        if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1391,29 +1387,42 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
        if (tsk->flags & PF_THREAD_BOUND)
                return -EINVAL;
 
-       ret = security_task_setscheduler(tsk);
-       if (ret)
-               return ret;
-       if (threadgroup) {
-               struct task_struct *c;
-
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       ret = security_task_setscheduler(c);
-                       if (ret) {
-                               rcu_read_unlock();
-                               return ret;
-                       }
-               }
-               rcu_read_unlock();
-       }
        return 0;
 }
 
-static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
-                              struct cpuset *cs)
+static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task)
+{
+       return security_task_setscheduler(task);
+}
+
+/*
+ * Protected by cgroup_lock. The nodemasks must be stored globally because
+ * dynamically allocating them is not allowed in pre_attach, and they must
+ * persist among pre_attach, attach_task, and attach.
+ */
+static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_from;
+static nodemask_t cpuset_attach_nodemask_to;
+
+/* Set-up work for before attaching each task. */
+static void cpuset_pre_attach(struct cgroup *cont)
+{
+       struct cpuset *cs = cgroup_cs(cont);
+
+       if (cs == &top_cpuset)
+               cpumask_copy(cpus_attach, cpu_possible_mask);
+       else
+               guarantee_online_cpus(cs, cpus_attach);
+
+       guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+}
+
+/* Per-thread attachment work. */
+static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
 {
        int err;
+       struct cpuset *cs = cgroup_cs(cont);
+
        /*
         * can_attach beforehand should guarantee that this doesn't fail.
         * TODO: have a better way to handle failure here
@@ -1421,45 +1430,29 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
        err = set_cpus_allowed_ptr(tsk, cpus_attach);
        WARN_ON_ONCE(err);
 
-       cpuset_change_task_nodemask(tsk, to);
+       cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to);
        cpuset_update_task_spread_flag(cs, tsk);
-
 }
 
 static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-                         struct cgroup *oldcont, struct task_struct *tsk,
-                         bool threadgroup)
+                         struct cgroup *oldcont, struct task_struct *tsk)
 {
        struct mm_struct *mm;
        struct cpuset *cs = cgroup_cs(cont);
        struct cpuset *oldcs = cgroup_cs(oldcont);
-       static nodemask_t to;           /* protected by cgroup_mutex */
-
-       if (cs == &top_cpuset) {
-               cpumask_copy(cpus_attach, cpu_possible_mask);
-       } else {
-               guarantee_online_cpus(cs, cpus_attach);
-       }
-       guarantee_online_mems(cs, &to);
-
-       /* do per-task migration stuff possibly for each in the threadgroup */
-       cpuset_attach_task(tsk, &to, cs);
-       if (threadgroup) {
-               struct task_struct *c;
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       cpuset_attach_task(c, &to, cs);
-               }
-               rcu_read_unlock();
-       }
 
-       /* change mm; only needs to be done once even if threadgroup */
-       to = cs->mems_allowed;
+       /*
+        * Change mm, possibly for multiple threads in a threadgroup. This is
+        * expensive and may sleep.
+        */
+       cpuset_attach_nodemask_from = oldcs->mems_allowed;
+       cpuset_attach_nodemask_to = cs->mems_allowed;
        mm = get_task_mm(tsk);
        if (mm) {
-               mpol_rebind_mm(mm, &to);
+               mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
                if (is_memory_migrate(cs))
-                       cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to);
+                       cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from,
+                                         &cpuset_attach_nodemask_to);
                mmput(mm);
        }
 }
@@ -1809,10 +1802,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
 }
 
 /*
- * post_clone() is called at the end of cgroup_clone().
- * 'cgroup' was just created automatically as a result of
- * a cgroup_clone(), and the current task is about to
- * be moved into 'cgroup'.
+ * post_clone() is called during cgroup_create() when the
+ * clone_children mount argument was specified.  The cgroup
+ * can not yet have any tasks.
  *
  * Currently we refuse to set up the cgroup - thereby
  * refusing the task to be entered, and as a result refusing
@@ -1911,6 +1903,9 @@ struct cgroup_subsys cpuset_subsys = {
        .create = cpuset_create,
        .destroy = cpuset_destroy,
        .can_attach = cpuset_can_attach,
+       .can_attach_task = cpuset_can_attach_task,
+       .pre_attach = cpuset_pre_attach,
+       .attach_task = cpuset_attach_task,
        .attach = cpuset_attach,
        .populate = cpuset_populate,
        .post_clone = cpuset_post_clone,
@@ -2195,7 +2190,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
        rcu_read_lock();
        cs = task_cs(tsk);
        if (cs)
-               cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
+               do_set_cpus_allowed(tsk, cs->cpus_allowed);
        rcu_read_unlock();
 
        /*
@@ -2222,7 +2217,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
                 * Like above we can temporary set any mask and rely on
                 * set_cpus_allowed_ptr() as synchronization point.
                 */
-               cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
+               do_set_cpus_allowed(tsk, cpu_possible_mask);
                cpu = cpumask_any(cpu_active_mask);
        }
 
@@ -2465,11 +2460,19 @@ static int cpuset_spread_node(int *rotor)
 
 int cpuset_mem_spread_node(void)
 {
+       if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE)
+               current->cpuset_mem_spread_rotor =
+                       node_random(&current->mems_allowed);
+
        return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
 }
 
 int cpuset_slab_spread_node(void)
 {
+       if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE)
+               current->cpuset_slab_spread_rotor =
+                       node_random(&current->mems_allowed);
+
        return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
 }