memcg: fix mem_cgroup_shrink_usage()
Daisuke Nishimura [Thu, 30 Apr 2009 22:08:19 +0000 (15:08 -0700)]
Current mem_cgroup_shrink_usage() has two problems.

1. It doesn't call mem_cgroup_out_of_memory and doesn't update
   last_oom_jiffies, so pagefault_out_of_memory invokes global OOM.

2. Considering hierarchy, shrinking has to be done from the
   mem_over_limit, not from the memcg which the page would be charged to.

mem_cgroup_try_charge_swapin() does all of these things properly, so we
use it and call cancel_charge_swapin when it succeeded.

The name of "shrink_usage" is not appropriate for this behavior, so we
change it too.

Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.cn>
Cc: Paul Menage <menage@google.com>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

include/linux/memcontrol.h
mm/memcontrol.c
mm/shmem.c

index a9e3b76..25b9ca9 100644 (file)
@@ -56,7 +56,7 @@ extern void mem_cgroup_move_lists(struct page *page,
                                  enum lru_list from, enum lru_list to);
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_uncharge_cache_page(struct page *page);
-extern int mem_cgroup_shrink_usage(struct page *page,
+extern int mem_cgroup_shmem_charge_fallback(struct page *page,
                        struct mm_struct *mm, gfp_t gfp_mask);
 
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -155,7 +155,7 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
 {
 }
 
-static inline int mem_cgroup_shrink_usage(struct page *page,
+static inline int mem_cgroup_shmem_charge_fallback(struct page *page,
                        struct mm_struct *mm, gfp_t gfp_mask)
 {
        return 0;
index 575203a..01c2d8f 100644 (file)
@@ -1617,37 +1617,28 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
 }
 
 /*
- * A call to try to shrink memory usage under specified resource controller.
- * This is typically used for page reclaiming for shmem for reducing side
- * effect of page allocation from shmem, which is used by some mem_cgroup.
+ * A call to try to shrink memory usage on charge failure at shmem's swapin.
+ * Calling hierarchical_reclaim is not enough because we should update
+ * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM.
+ * Moreover considering hierarchy, we should reclaim from the mem_over_limit,
+ * not from the memcg which this page would be charged to.
+ * try_charge_swapin does all of these works properly.
  */
-int mem_cgroup_shrink_usage(struct page *page,
+int mem_cgroup_shmem_charge_fallback(struct page *page,
                            struct mm_struct *mm,
                            gfp_t gfp_mask)
 {
        struct mem_cgroup *mem = NULL;
-       int progress = 0;
-       int retry = MEM_CGROUP_RECLAIM_RETRIES;
+       int ret;
 
        if (mem_cgroup_disabled())
                return 0;
-       if (page)
-               mem = try_get_mem_cgroup_from_swapcache(page);
-       if (!mem && mm)
-               mem = try_get_mem_cgroup_from_mm(mm);
-       if (unlikely(!mem))
-               return 0;
 
-       do {
-               progress = mem_cgroup_hierarchical_reclaim(mem,
-                                       gfp_mask, true, false);
-               progress += mem_cgroup_check_under_limit(mem);
-       } while (!progress && --retry);
+       ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
+       if (!ret)
+               mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */
 
-       css_put(&mem->css);
-       if (!retry)
-               return -ENOMEM;
-       return 0;
+       return ret;
 }
 
 static DEFINE_MUTEX(set_limit_mutex);
index f9cb20e..b25f95c 100644 (file)
@@ -1340,8 +1340,12 @@ repeat:
                        shmem_swp_unmap(entry);
                        spin_unlock(&info->lock);
                        if (error == -ENOMEM) {
-                               /* allow reclaim from this memory cgroup */
-                               error = mem_cgroup_shrink_usage(swappage,
+                               /*
+                                * reclaim from proper memory cgroup and
+                                * call memcg's OOM if needed.
+                                */
+                               error = mem_cgroup_shmem_charge_fallback(
+                                                               swappage,
                                                                current->mm,
                                                                gfp);
                                if (error) {