memcg: memory hotplug fix for notifier callback
KAMEZAWA Hiroyuki [Mon, 1 Dec 2008 21:13:48 +0000 (13:13 -0800)]
Fixes for memcg/memory hotplug.

While memory hotplug allocate/free memmap, page_cgroup doesn't free
page_cgroup at OFFLINE when page_cgroup is allocated via bootomem.
(Because freeing bootmem requires special care.)

Then, if page_cgroup is allocated by bootmem and memmap is freed/allocated
by memory hotplug, page_cgroup->page == page is no longer true.

But current MEM_ONLINE handler doesn't check it and update
page_cgroup->page if it's not necessary to allocate page_cgroup.  (This
was not found because memmap is not freed if SPARSEMEM_VMEMMAP is y.)

And I noticed that MEM_ONLINE can be called against "part of section".
So, freeing page_cgroup at CANCEL_ONLINE will cause trouble.  (freeing
used page_cgroup) Don't rollback at CANCEL.

One more, current memory hotplug notifier is stopped by slub because it
sets NOTIFY_STOP_MASK to return vaule.  So, page_cgroup's callback never
be called.  (low priority than slub now.)

I think this slub's behavior is not intentional(BUG). and fixes it.

Another way to be considered about page_cgroup allocation:
  - free page_cgroup at OFFLINE even if it's from bootmem
    and remove specieal handler. But it requires more changes.

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=12041

Signed-off-by: KAMEZAWA Hiruyoki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Tested-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

mm/page_cgroup.c
mm/slub.c

index 436c002..0b3cbf0 100644 (file)
@@ -107,19 +107,29 @@ int __init_refok init_section_page_cgroup(unsigned long pfn)
 
        section = __pfn_to_section(pfn);
 
-       if (section->page_cgroup)
-               return 0;
-
-       nid = page_to_nid(pfn_to_page(pfn));
-
-       table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
-       if (slab_is_available()) {
-               base = kmalloc_node(table_size, GFP_KERNEL, nid);
-               if (!base)
-                       base = vmalloc_node(table_size, nid);
-       } else {
-               base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), table_size,
+       if (!section->page_cgroup) {
+               nid = page_to_nid(pfn_to_page(pfn));
+               table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
+               if (slab_is_available()) {
+                       base = kmalloc_node(table_size, GFP_KERNEL, nid);
+                       if (!base)
+                               base = vmalloc_node(table_size, nid);
+               } else {
+                       base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
+                               table_size,
                                PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+               }
+       } else {
+               /*
+                * We don't have to allocate page_cgroup again, but
+                * address of memmap may be changed. So, we have to initialize
+                * again.
+                */
+               base = section->page_cgroup + pfn;
+               table_size = 0;
+               /* check address of memmap is changed or not. */
+               if (base->page == pfn_to_page(pfn))
+                       return 0;
        }
 
        if (!base) {
@@ -208,18 +218,23 @@ static int __meminit page_cgroup_callback(struct notifier_block *self,
                ret = online_page_cgroup(mn->start_pfn,
                                   mn->nr_pages, mn->status_change_nid);
                break;
-       case MEM_CANCEL_ONLINE:
        case MEM_OFFLINE:
                offline_page_cgroup(mn->start_pfn,
                                mn->nr_pages, mn->status_change_nid);
                break;
+       case MEM_CANCEL_ONLINE:
        case MEM_GOING_OFFLINE:
                break;
        case MEM_ONLINE:
        case MEM_CANCEL_OFFLINE:
                break;
        }
-       ret = notifier_from_errno(ret);
+
+       if (ret)
+               ret = notifier_from_errno(ret);
+       else
+               ret = NOTIFY_OK;
+
        return ret;
 }
 
index 7ad489a..749588a 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2931,8 +2931,10 @@ static int slab_memory_callback(struct notifier_block *self,
        case MEM_CANCEL_OFFLINE:
                break;
        }
-
-       ret = notifier_from_errno(ret);
+       if (ret)
+               ret = notifier_from_errno(ret);
+       else
+               ret = NOTIFY_OK;
        return ret;
 }