workqueue: CPU hotplug keep idle workers
[linux-2.6.git] / kernel / cgroup_freezer.c
index 5e6d26b..5c248e5 100644 (file)
@@ -14,7 +14,8 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/slab.h>
 #include <linux/cgroup.h>
 #include <linux/fs.h>
 #include <linux/uaccess.h>
@@ -47,17 +48,17 @@ static inline struct freezer *task_freezer(struct task_struct *task)
                            struct freezer, css);
 }
 
-int cgroup_frozen(struct task_struct *task)
+bool cgroup_freezing(struct task_struct *task)
 {
-       struct freezer *freezer;
        enum freezer_state state;
+       bool ret;
 
-       task_lock(task);
-       freezer = task_freezer(task);
-       state = freezer->state;
-       task_unlock(task);
+       rcu_read_lock();
+       state = task_freezer(task)->state;
+       ret = state == CGROUP_FREEZING || state == CGROUP_FROZEN;
+       rcu_read_unlock();
 
-       return state == CGROUP_FROZEN;
+       return ret;
 }
 
 /*
@@ -85,10 +86,10 @@ struct cgroup_subsys freezer_subsys;
 
 /* Locks taken and their ordering
  * ------------------------------
- * css_set_lock
  * cgroup_mutex (AKA cgroup_lock)
- * task->alloc_lock (AKA task_lock)
  * freezer->lock
+ * css_set_lock
+ * task->alloc_lock (AKA task_lock)
  * task->sighand->siglock
  *
  * cgroup code forces css_set_lock to be taken before task->alloc_lock
@@ -96,37 +97,38 @@ struct cgroup_subsys freezer_subsys;
  * freezer_create(), freezer_destroy():
  * cgroup_mutex [ by cgroup core ]
  *
- * can_attach():
- * cgroup_mutex
- *
- * cgroup_frozen():
- * task->alloc_lock (to get task's cgroup)
+ * freezer_can_attach():
+ * cgroup_mutex (held by caller of can_attach)
  *
  * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
- * task->alloc_lock (to get task's cgroup)
  * freezer->lock
  *  sighand->siglock (if the cgroup is freezing)
  *
  * freezer_read():
  * cgroup_mutex
  *  freezer->lock
+ *   write_lock css_set_lock (cgroup iterator start)
+ *    task->alloc_lock
  *   read_lock css_set_lock (cgroup iterator start)
  *
  * freezer_write() (freeze):
  * cgroup_mutex
  *  freezer->lock
+ *   write_lock css_set_lock (cgroup iterator start)
+ *    task->alloc_lock
  *   read_lock css_set_lock (cgroup iterator start)
- *    sighand->siglock
+ *    sighand->siglock (fake signal delivery inside freeze_task())
  *
  * freezer_write() (unfreeze):
  * cgroup_mutex
  *  freezer->lock
+ *   write_lock css_set_lock (cgroup iterator start)
+ *    task->alloc_lock
  *   read_lock css_set_lock (cgroup iterator start)
- *    task->alloc_lock (to prevent races with freeze_task())
+ *    task->alloc_lock (inside __thaw_task(), prevents race with refrigerator())
  *     sighand->siglock
  */
-static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
-                                                 struct cgroup *cgroup)
+static struct cgroup_subsys_state *freezer_create(struct cgroup *cgroup)
 {
        struct freezer *freezer;
 
@@ -139,13 +141,16 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
        return &freezer->css;
 }
 
-static void freezer_destroy(struct cgroup_subsys *ss,
-                           struct cgroup *cgroup)
+static void freezer_destroy(struct cgroup *cgroup)
 {
-       kfree(cgroup_freezer(cgroup));
+       struct freezer *freezer = cgroup_freezer(cgroup);
+
+       if (freezer->state != CGROUP_THAWED)
+               atomic_dec(&system_freezing_cnt);
+       kfree(freezer);
 }
 
-/* Task is frozen or will freeze immediately when next it gets woken */
+/* task is frozen or will freeze immediately when next it gets woken */
 static bool is_task_frozen_enough(struct task_struct *task)
 {
        return frozen(task) ||
@@ -157,74 +162,92 @@ static bool is_task_frozen_enough(struct task_struct *task)
  * a write to that file racing against an attach, and hence the
  * can_attach() result will remain valid until the attach completes.
  */
-static int freezer_can_attach(struct cgroup_subsys *ss,
-                             struct cgroup *new_cgroup,
-                             struct task_struct *task)
+static int freezer_can_attach(struct cgroup *new_cgroup,
+                             struct cgroup_taskset *tset)
 {
        struct freezer *freezer;
+       struct task_struct *task;
+
+       if ((current != task) && (!capable(CAP_SYS_ADMIN))) {
+               const struct cred *cred = current_cred(), *tcred;
+
+               tcred = __task_cred(task);
+               if (cred->euid != tcred->uid && cred->euid != tcred->suid)
+                       return -EPERM;
+       }
 
        /*
         * Anything frozen can't move or be moved to/from.
-        *
-        * Since orig_freezer->state == FROZEN means that @task has been
-        * frozen, so it's sufficient to check the latter condition.
         */
-
-       if (is_task_frozen_enough(task))
-               return -EBUSY;
+       cgroup_taskset_for_each(task, new_cgroup, tset)
+               if (cgroup_freezing(task))
+                       return -EBUSY;
 
        freezer = cgroup_freezer(new_cgroup);
-       if (freezer->state == CGROUP_FROZEN)
+       if (freezer->state != CGROUP_THAWED)
                return -EBUSY;
 
        return 0;
 }
 
-static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
+static void freezer_fork(struct task_struct *task)
 {
        struct freezer *freezer;
 
-       task_lock(task);
+       /*
+        * No lock is needed, since the task isn't on tasklist yet,
+        * so it can't be moved to another cgroup, which means the
+        * freezer won't be removed and will be valid during this
+        * function call.  Nevertheless, apply RCU read-side critical
+        * section to suppress RCU lockdep false positives.
+        */
+       rcu_read_lock();
        freezer = task_freezer(task);
-       task_unlock(task);
+       rcu_read_unlock();
+
+       /*
+        * The root cgroup is non-freezable, so we can skip the
+        * following check.
+        */
+       if (!freezer->css.cgroup->parent)
+               return;
 
        spin_lock_irq(&freezer->lock);
        BUG_ON(freezer->state == CGROUP_FROZEN);
 
        /* Locking avoids race with FREEZING -> THAWED transitions. */
        if (freezer->state == CGROUP_FREEZING)
-               freeze_task(task, true);
+               freeze_task(task);
        spin_unlock_irq(&freezer->lock);
 }
 
 /*
  * caller must hold freezer->lock
  */
-static void update_freezer_state(struct cgroup *cgroup,
+static void update_if_frozen(struct cgroup *cgroup,
                                 struct freezer *freezer)
 {
        struct cgroup_iter it;
        struct task_struct *task;
        unsigned int nfrozen = 0, ntotal = 0;
+       enum freezer_state old_state = freezer->state;
 
        cgroup_iter_start(cgroup, &it);
        while ((task = cgroup_iter_next(cgroup, &it))) {
                ntotal++;
-               if (is_task_frozen_enough(task))
+               if (freezing(task) && is_task_frozen_enough(task))
                        nfrozen++;
        }
 
-       /*
-        * Transition to FROZEN when no new tasks can be added ensures
-        * that we never exist in the FROZEN state while there are unfrozen
-        * tasks.
-        */
-       if (nfrozen == ntotal)
-               freezer->state = CGROUP_FROZEN;
-       else if (nfrozen > 0)
-               freezer->state = CGROUP_FREEZING;
-       else
-               freezer->state = CGROUP_THAWED;
+       if (old_state == CGROUP_THAWED) {
+               BUG_ON(nfrozen > 0);
+       } else if (old_state == CGROUP_FREEZING) {
+               if (nfrozen == ntotal)
+                       freezer->state = CGROUP_FROZEN;
+       } else { /* old_state == CGROUP_FROZEN */
+               BUG_ON(nfrozen != ntotal);
+       }
+
        cgroup_iter_end(cgroup, &it);
 }
 
@@ -243,7 +266,7 @@ static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
        if (state == CGROUP_FREEZING) {
                /* We change from FREEZING to FROZEN lazily if the cgroup was
                 * only partially frozen when we exitted write. */
-               update_freezer_state(cgroup, freezer);
+               update_if_frozen(cgroup, freezer);
                state = freezer->state;
        }
        spin_unlock_irq(&freezer->lock);
@@ -260,10 +283,9 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
        struct task_struct *task;
        unsigned int num_cant_freeze_now = 0;
 
-       freezer->state = CGROUP_FREEZING;
        cgroup_iter_start(cgroup, &it);
        while ((task = cgroup_iter_next(cgroup, &it))) {
-               if (!freeze_task(task, true))
+               if (!freeze_task(task))
                        continue;
                if (is_task_frozen_enough(task))
                        continue;
@@ -281,12 +303,9 @@ static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
        struct task_struct *task;
 
        cgroup_iter_start(cgroup, &it);
-       while ((task = cgroup_iter_next(cgroup, &it))) {
-               thaw_process(task);
-       }
+       while ((task = cgroup_iter_next(cgroup, &it)))
+               __thaw_task(task);
        cgroup_iter_end(cgroup, &it);
-
-       freezer->state = CGROUP_THAWED;
 }
 
 static int freezer_change_state(struct cgroup *cgroup,
@@ -296,29 +315,28 @@ static int freezer_change_state(struct cgroup *cgroup,
        int retval = 0;
 
        freezer = cgroup_freezer(cgroup);
+
        spin_lock_irq(&freezer->lock);
-       update_freezer_state(cgroup, freezer);
-       if (goal_state == freezer->state)
-               goto out;
-       switch (freezer->state) {
+
+       update_if_frozen(cgroup, freezer);
+
+       switch (goal_state) {
        case CGROUP_THAWED:
-               retval = try_to_freeze_cgroup(cgroup, freezer);
+               if (freezer->state != CGROUP_THAWED)
+                       atomic_dec(&system_freezing_cnt);
+               freezer->state = CGROUP_THAWED;
+               unfreeze_cgroup(cgroup, freezer);
                break;
-       case CGROUP_FREEZING:
-               if (goal_state == CGROUP_FROZEN) {
-                       /* Userspace is retrying after
-                        * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
-                       retval = try_to_freeze_cgroup(cgroup, freezer);
-                       break;
-               }
-               /* state == FREEZING and goal_state == THAWED, so unfreeze */
        case CGROUP_FROZEN:
-               unfreeze_cgroup(cgroup, freezer);
+               if (freezer->state == CGROUP_THAWED)
+                       atomic_inc(&system_freezing_cnt);
+               freezer->state = CGROUP_FREEZING;
+               retval = try_to_freeze_cgroup(cgroup, freezer);
                break;
        default:
-               break;
+               BUG();
        }
-out:
+
        spin_unlock_irq(&freezer->lock);
 
        return retval;
@@ -336,7 +354,7 @@ static int freezer_write(struct cgroup *cgroup,
        else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
                goal_state = CGROUP_FROZEN;
        else
-               return -EIO;
+               return -EINVAL;
 
        if (!cgroup_lock_live_group(cgroup))
                return -ENODEV;
@@ -355,6 +373,8 @@ static struct cftype files[] = {
 
 static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
 {
+       if (!cgroup->parent)
+               return 0;
        return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
 }
 
@@ -365,7 +385,5 @@ struct cgroup_subsys freezer_subsys = {
        .populate       = freezer_populate,
        .subsys_id      = freezer_subsys_id,
        .can_attach     = freezer_can_attach,
-       .attach         = NULL,
        .fork           = freezer_fork,
-       .exit           = NULL,
 };