X-Git-Url: https://nv-tegra.nvidia.com/r/gitweb?p=linux-2.6.git;a=blobdiff_plain;f=mm%2Fmemory_hotplug.c;h=c46887b5a11eaa5bbe48008d16ad71bd37f7302f;hp=85eb4d342ac5010ccc0997be9a251d20cb82bb50;hb=6a7ebdf2fd15417e87b4fd02ff411aeaca34da5f;hpb=cf23422b9d76215316855253da491d4c9f294372 diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 85eb4d342ac..c46887b5a11 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -34,6 +34,23 @@ #include "internal.h" +DEFINE_MUTEX(mem_hotplug_mutex); + +void lock_memory_hotplug(void) +{ + mutex_lock(&mem_hotplug_mutex); + + /* for exclusive hibernation if CONFIG_HIBERNATION=y */ + lock_system_sleep(); +} + +void unlock_memory_hotplug(void) +{ + unlock_system_sleep(); + mutex_unlock(&mem_hotplug_mutex); +} + + /* add this memory to iomem resource */ static struct resource *register_memory_resource(u64 start, u64 size) { @@ -65,9 +82,10 @@ static void release_memory_resource(struct resource *res) #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE #ifndef CONFIG_SPARSEMEM_VMEMMAP -static void get_page_bootmem(unsigned long info, struct page *page, int type) +static void get_page_bootmem(unsigned long info, struct page *page, + unsigned long type) { - atomic_set(&page->_mapcount, type); + page->lru.next = (struct list_head *) type; SetPagePrivate(page); set_page_private(page, info); atomic_inc(&page->_count); @@ -77,15 +95,16 @@ static void get_page_bootmem(unsigned long info, struct page *page, int type) * so use __ref to tell modpost not to generate a warning */ void __ref put_page_bootmem(struct page *page) { - int type; + unsigned long type; - type = atomic_read(&page->_mapcount); - BUG_ON(type >= -1); + type = (unsigned long) page->lru.next; + BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || + type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE); if (atomic_dec_return(&page->_count) == 1) { ClearPagePrivate(page); set_page_private(page, 0); - reset_page_mapcount(page); + INIT_LIST_HEAD(&page->lru); __free_pages_bootmem(page, 0); } @@ -355,10 +374,6 @@ void online_page(struct page *page) totalhigh_pages++; #endif -#ifdef CONFIG_FLATMEM - max_mapnr = max(page_to_pfn(page), max_mapnr); -#endif - ClearPageReserved(page); init_page_count(page); __free_page(page); @@ -381,7 +396,7 @@ static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages, } -int online_pages(unsigned long pfn, unsigned long nr_pages) +int __ref online_pages(unsigned long pfn, unsigned long nr_pages) { unsigned long onlined_pages = 0; struct zone *zone; @@ -390,6 +405,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) int ret; struct memory_notify arg; + lock_memory_hotplug(); arg.start_pfn = pfn; arg.nr_pages = nr_pages; arg.status_change_nid = -1; @@ -402,6 +418,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) ret = notifier_to_errno(ret); if (ret) { memory_notify(MEM_CANCEL_ONLINE, &arg); + unlock_memory_hotplug(); return ret; } /* @@ -415,38 +432,44 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) * This means the page allocator ignores this zone. * So, zonelist must be updated after online. */ + mutex_lock(&zonelists_mutex); if (!populated_zone(zone)) need_zonelists_rebuild = 1; ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages, online_pages_range); if (ret) { + mutex_unlock(&zonelists_mutex); printk(KERN_DEBUG "online_pages %lx at %lx failed\n", nr_pages, pfn); memory_notify(MEM_CANCEL_ONLINE, &arg); + unlock_memory_hotplug(); return ret; } zone->present_pages += onlined_pages; zone->zone_pgdat->node_present_pages += onlined_pages; + if (need_zonelists_rebuild) + build_all_zonelists(zone); + else + zone_pcp_update(zone); + + mutex_unlock(&zonelists_mutex); + + init_per_zone_wmark_min(); - zone_pcp_update(zone); - setup_per_zone_wmarks(); - calculate_zone_inactive_ratio(zone); if (onlined_pages) { kswapd_run(zone_to_nid(zone)); node_set_state(zone_to_nid(zone), N_HIGH_MEMORY); } - if (need_zonelists_rebuild) - build_all_zonelists(); - else - vm_total_pages = nr_free_pagecache_pages(); + vm_total_pages = nr_free_pagecache_pages(); writeback_set_ratelimit(); if (onlined_pages) memory_notify(MEM_ONLINE, &arg); + unlock_memory_hotplug(); return 0; } @@ -471,6 +494,14 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) /* init node's zones as empty zones, we don't have any present pages.*/ free_area_init_node(nid, zones_size, start_pfn, zholes_size); + /* + * The node we allocated has no zone fallback lists. For avoiding + * to access not-initialized zonelist, build here. + */ + mutex_lock(&zonelists_mutex); + build_all_zonelists(NULL); + mutex_unlock(&zonelists_mutex); + return pgdat; } @@ -490,9 +521,9 @@ int mem_online_node(int nid) pg_data_t *pgdat; int ret; - lock_system_sleep(); + lock_memory_hotplug(); pgdat = hotadd_new_pgdat(nid, 0); - if (pgdat) { + if (!pgdat) { ret = -ENOMEM; goto out; } @@ -501,7 +532,7 @@ int mem_online_node(int nid) BUG_ON(ret); out: - unlock_system_sleep(); + unlock_memory_hotplug(); return ret; } @@ -513,7 +544,7 @@ int __ref add_memory(int nid, u64 start, u64 size) struct resource *res; int ret; - lock_system_sleep(); + lock_memory_hotplug(); res = register_memory_resource(start, size); ret = -EEXIST; @@ -560,7 +591,7 @@ error: release_memory_resource(res); out: - unlock_system_sleep(); + unlock_memory_hotplug(); return ret; } EXPORT_SYMBOL_GPL(add_memory); @@ -581,45 +612,32 @@ static inline int pageblock_free(struct page *page) /* Return the start of the next active pageblock after a given page */ static struct page *next_active_pageblock(struct page *page) { - int pageblocks_stride; - /* Ensure the starting page is pageblock-aligned */ BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1)); - /* Move forward by at least 1 * pageblock_nr_pages */ - pageblocks_stride = 1; - /* If the entire pageblock is free, move to the end of free page */ - if (pageblock_free(page)) - pageblocks_stride += page_order(page) - pageblock_order; + if (pageblock_free(page)) { + int order; + /* be careful. we don't have locks, page_order can be changed.*/ + order = page_order(page); + if ((order < MAX_ORDER) && (order >= pageblock_order)) + return page + (1 << order); + } - return page + (pageblocks_stride * pageblock_nr_pages); + return page + pageblock_nr_pages; } /* Checks if this range of memory is likely to be hot-removable. */ int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages) { - int type; struct page *page = pfn_to_page(start_pfn); struct page *end_page = page + nr_pages; /* Check the starting page of each pageblock within the range */ for (; page < end_page; page = next_active_pageblock(page)) { - type = get_pageblock_migratetype(page); - - /* - * A pageblock containing MOVABLE or free pages is considered - * removable - */ - if (type != MIGRATE_MOVABLE && !pageblock_free(page)) - return 0; - - /* - * A pageblock starting with a PageReserved page is not - * considered removable. - */ - if (PageReserved(page)) + if (!is_pageblock_removable_nolock(page)) return 0; + cond_resched(); } /* All pageblocks in the memory block are likely to be hot-removable */ @@ -656,7 +674,7 @@ static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn) * Scanning pfn is much easier than scanning lru list. * Scan pfn from start to end and Find LRU page. */ -int scan_lru_pages(unsigned long start, unsigned long end) +static unsigned long scan_lru_pages(unsigned long start, unsigned long end) { unsigned long pfn; struct page *page; @@ -692,7 +710,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (!pfn_valid(pfn)) continue; page = pfn_to_page(pfn); - if (!page_count(page)) + if (!get_page_unless_zero(page)) continue; /* * We can skip free pages. And we can only deal with pages on @@ -700,35 +718,39 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) */ ret = isolate_lru_page(page); if (!ret) { /* Success */ + put_page(page); list_add_tail(&page->lru, &source); move_pages--; inc_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); } else { - /* Becasue we don't have big zone->lock. we should - check this again here. */ - if (page_count(page)) - not_managed++; #ifdef CONFIG_DEBUG_VM printk(KERN_ALERT "removing pfn %lx from LRU failed\n", pfn); dump_page(page); #endif + put_page(page); + /* Because we don't have big zone->lock. we should + check this again here. */ + if (page_count(page)) { + not_managed++; + ret = -EBUSY; + break; + } } } - ret = -EBUSY; - if (not_managed) { - if (!list_empty(&source)) + if (!list_empty(&source)) { + if (not_managed) { + putback_lru_pages(&source); + goto out; + } + /* this function returns # of failed pages */ + ret = migrate_pages(&source, hotremove_migrate_alloc, 0, + true, true); + if (ret) putback_lru_pages(&source); - goto out; } - ret = 0; - if (list_empty(&source)) - goto out; - /* this function returns # of failed pages */ - ret = migrate_pages(&source, hotremove_migrate_alloc, 0, 1); - out: return ret; } @@ -780,7 +802,7 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn) return offlined; } -static int offline_pages(unsigned long start_pfn, +static int __ref offline_pages(unsigned long start_pfn, unsigned long end_pfn, unsigned long timeout) { unsigned long pfn, nr_pages, expire; @@ -800,7 +822,7 @@ static int offline_pages(unsigned long start_pfn, if (!test_pages_in_a_zone(start_pfn, end_pfn)) return -EINVAL; - lock_system_sleep(); + lock_memory_hotplug(); zone = page_zone(pfn_to_page(start_pfn)); node = zone_to_nid(zone); @@ -837,7 +859,6 @@ repeat: ret = 0; if (drain) { lru_add_drain_all(); - flush_scheduled_work(); cond_resched(); drain_all_pages(); } @@ -859,7 +880,6 @@ repeat: } /* drain all zone's lru pagevec, this is asyncronous... */ lru_add_drain_all(); - flush_scheduled_work(); yield(); /* drain pcp pages , this is synchrouns. */ drain_all_pages(); @@ -880,8 +900,8 @@ repeat: zone->zone_pgdat->node_present_pages -= offlined_pages; totalram_pages -= offlined_pages; - setup_per_zone_wmarks(); - calculate_zone_inactive_ratio(zone); + init_per_zone_wmark_min(); + if (!node_present_pages(node)) { node_clear_state(node, N_HIGH_MEMORY); kswapd_stop(node); @@ -891,7 +911,7 @@ repeat: writeback_set_ratelimit(); memory_notify(MEM_OFFLINE, &arg); - unlock_system_sleep(); + unlock_memory_hotplug(); return 0; failed_removal: @@ -902,7 +922,7 @@ failed_removal: undo_isolate_page_range(start_pfn, end_pfn); out: - unlock_system_sleep(); + unlock_memory_hotplug(); return ret; }