memcg: clean up waiting move acct
KAMEZAWA Hiroyuki [Wed, 11 Aug 2010 01:02:58 +0000 (18:02 -0700)]
Now, for checking a memcg is under task-account-moving, we do css_tryget()
against mc.to and mc.from.  But this is just complicating things.  This
patch makes the check easier.

This patch adds a spinlock to move_charge_struct and guard modification of
mc.to and mc.from.  By this, we don't have to think about complicated
races arount this not-critical path.

[balbir@linux.vnet.ibm.com: don't crash on a null memcg being passed]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

mm/memcontrol.c

index 991860e..27981e7 100644 (file)
@@ -268,6 +268,7 @@ enum move_type {
 
 /* "mc" and its members are protected by cgroup_mutex */
 static struct move_charge_struct {
+       spinlock_t        lock; /* for from, to, moving_task */
        struct mem_cgroup *from;
        struct mem_cgroup *to;
        unsigned long precharge;
@@ -276,6 +277,7 @@ static struct move_charge_struct {
        struct task_struct *moving_task;        /* a task moving charges */
        wait_queue_head_t waitq;                /* a waitq for other context */
 } mc = {
+       .lock = __SPIN_LOCK_UNLOCKED(mc.lock),
        .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq),
 };
 
@@ -1051,26 +1053,24 @@ static unsigned int get_swappiness(struct mem_cgroup *memcg)
 
 static bool mem_cgroup_under_move(struct mem_cgroup *mem)
 {
-       struct mem_cgroup *from = mc.from;
-       struct mem_cgroup *to = mc.to;
+       struct mem_cgroup *from;
+       struct mem_cgroup *to;
        bool ret = false;
-
-       if (from == mem || to == mem)
-               return true;
-
-       if (!from || !to || !mem->use_hierarchy)
-               return false;
-
-       rcu_read_lock();
-       if (css_tryget(&from->css)) {
-               ret = css_is_ancestor(&from->css, &mem->css);
-               css_put(&from->css);
-       }
-       if (!ret && css_tryget(&to->css)) {
-               ret = css_is_ancestor(&to->css, &mem->css);
-               css_put(&to->css);
-       }
-       rcu_read_unlock();
+       /*
+        * Unlike task_move routines, we access mc.to, mc.from not under
+        * mutual exclusion by cgroup_mutex. Here, we take spinlock instead.
+        */
+       spin_lock(&mc.lock);
+       from = mc.from;
+       to = mc.to;
+       if (!from)
+               goto unlock;
+       if (from == mem || to == mem
+           || (mem->use_hierarchy && css_is_ancestor(&from->css, &mem->css))
+           || (mem->use_hierarchy && css_is_ancestor(&to->css, &mem->css)))
+               ret = true;
+unlock:
+       spin_unlock(&mc.lock);
        return ret;
 }
 
@@ -1406,7 +1406,7 @@ static void memcg_wakeup_oom(struct mem_cgroup *mem)
 
 static void memcg_oom_recover(struct mem_cgroup *mem)
 {
-       if (atomic_read(&mem->oom_lock))
+       if (mem && atomic_read(&mem->oom_lock))
                memcg_wakeup_oom(mem);
 }
 
@@ -4441,11 +4441,13 @@ static int mem_cgroup_precharge_mc(struct mm_struct *mm)
 
 static void mem_cgroup_clear_mc(void)
 {
+       struct mem_cgroup *from = mc.from;
+       struct mem_cgroup *to = mc.to;
+
        /* we must uncharge all the leftover precharges from mc.to */
        if (mc.precharge) {
                __mem_cgroup_cancel_charge(mc.to, mc.precharge);
                mc.precharge = 0;
-               memcg_oom_recover(mc.to);
        }
        /*
         * we didn't uncharge from mc.from at mem_cgroup_move_account(), so
@@ -4454,7 +4456,6 @@ static void mem_cgroup_clear_mc(void)
        if (mc.moved_charge) {
                __mem_cgroup_cancel_charge(mc.from, mc.moved_charge);
                mc.moved_charge = 0;
-               memcg_oom_recover(mc.from);
        }
        /* we must fixup refcnts and charges */
        if (mc.moved_swap) {
@@ -4479,9 +4480,13 @@ static void mem_cgroup_clear_mc(void)
 
                mc.moved_swap = 0;
        }
+       spin_lock(&mc.lock);
        mc.from = NULL;
        mc.to = NULL;
        mc.moving_task = NULL;
+       spin_unlock(&mc.lock);
+       memcg_oom_recover(from);
+       memcg_oom_recover(to);
        wake_up_all(&mc.waitq);
 }
 
@@ -4510,12 +4515,14 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
                        VM_BUG_ON(mc.moved_charge);
                        VM_BUG_ON(mc.moved_swap);
                        VM_BUG_ON(mc.moving_task);
+                       spin_lock(&mc.lock);
                        mc.from = from;
                        mc.to = mem;
                        mc.precharge = 0;
                        mc.moved_charge = 0;
                        mc.moved_swap = 0;
                        mc.moving_task = current;
+                       spin_unlock(&mc.lock);
 
                        ret = mem_cgroup_precharge_mc(mm);
                        if (ret)