#include <linux/hugetlb.h>
#include <linux/gfp.h>
+#include <asm/tlbflush.h>
+
#include "internal.h"
#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
goto out;
pmd = pmd_offset(pud, addr);
+ if (pmd_trans_huge(*pmd))
+ goto out;
if (!pmd_present(*pmd))
goto out;
pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
if (is_write_migration_entry(entry))
pte = pte_mkwrite(pte);
+#ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(new))
pte = pte_mkhuge(pte);
+#endif
flush_cache_page(vma, addr, pte_pfn(pte));
set_pte_at(mm, addr, ptep, pte);
expected_count = 2 + page_has_private(page);
if (page_count(page) != expected_count ||
- (struct page *)radix_tree_deref_slot(pslot) != page) {
+ radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
spin_unlock_irq(&mapping->tree_lock);
return -EAGAIN;
}
*/
__dec_zone_page_state(page, NR_FILE_PAGES);
__inc_zone_page_state(newpage, NR_FILE_PAGES);
- if (PageSwapBacked(page)) {
+ if (!PageSwapCache(page) && PageSwapBacked(page)) {
__dec_zone_page_state(page, NR_SHMEM);
__inc_zone_page_state(newpage, NR_SHMEM);
}
expected_count = 2 + page_has_private(page);
if (page_count(page) != expected_count ||
- (struct page *)radix_tree_deref_slot(pslot) != page) {
+ radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
spin_unlock_irq(&mapping->tree_lock);
return -EAGAIN;
}
* redo the accounting that clear_page_dirty_for_io undid,
* but we can't use set_page_dirty because that function
* is actually a signal that all of the page has become dirty.
- * Wheras only part of our page may be dirty.
+ * Whereas only part of our page may be dirty.
*/
__set_page_dirty_nobuffers(newpage);
}
.nr_to_write = 1,
.range_start = 0,
.range_end = LLONG_MAX,
- .nonblocking = 1,
.for_reclaim = 1
};
int rc;
* == 0 - success
*/
static int move_to_new_page(struct page *newpage, struct page *page,
- int remap_swapcache)
+ int remap_swapcache, bool sync)
{
struct address_space *mapping;
int rc;
mapping = page_mapping(page);
if (!mapping)
rc = migrate_page(mapping, newpage, page);
- else if (mapping->a_ops->migratepage)
+ else {
/*
- * Most pages have a mapping and most filesystems
- * should provide a migration function. Anonymous
- * pages are part of swap space which also has its
- * own migration function. This is the most common
- * path for page migration.
+ * Do not writeback pages if !sync and migratepage is
+ * not pointing to migrate_page() which is nonblocking
+ * (swapcache/tmpfs uses migratepage = migrate_page).
*/
- rc = mapping->a_ops->migratepage(mapping,
- newpage, page);
- else
- rc = fallback_migrate_page(mapping, newpage, page);
+ if (PageDirty(page) && !sync &&
+ mapping->a_ops->migratepage != migrate_page)
+ rc = -EBUSY;
+ else if (mapping->a_ops->migratepage)
+ /*
+ * Most pages have a mapping and most filesystems
+ * should provide a migration function. Anonymous
+ * pages are part of swap space which also has its
+ * own migration function. This is the most common
+ * path for page migration.
+ */
+ rc = mapping->a_ops->migratepage(mapping,
+ newpage, page);
+ else
+ rc = fallback_migrate_page(mapping, newpage, page);
+ }
if (rc) {
newpage->mapping = NULL;
* to the newly allocated page in newpage.
*/
static int unmap_and_move(new_page_t get_new_page, unsigned long private,
- struct page *page, int force, int offlining)
+ struct page *page, int force, bool offlining, bool sync)
{
int rc = 0;
int *result = NULL;
struct page *newpage = get_new_page(page, private, &result);
int remap_swapcache = 1;
- int rcu_locked = 0;
int charge = 0;
- struct mem_cgroup *mem = NULL;
+ struct mem_cgroup *mem;
struct anon_vma *anon_vma = NULL;
if (!newpage)
/* page was freed from under us. So we are done. */
goto move_newpage;
}
+ if (unlikely(PageTransHuge(page)))
+ if (unlikely(split_huge_page(page)))
+ goto move_newpage;
/* prepare cgroup just returns 0 or -ENOMEM */
rc = -EAGAIN;
if (!trylock_page(page)) {
- if (!force)
+ if (!force || !sync)
+ goto move_newpage;
+
+ /*
+ * It's not safe for direct compaction to call lock_page.
+ * For example, during page readahead pages are added locked
+ * to the LRU. Later, when the IO completes the pages are
+ * marked uptodate and unlocked. However, the queueing
+ * could be merging multiple pages for one bio (e.g.
+ * mpage_readpages). If an allocation happens for the
+ * second or third page, the process can end up locking
+ * the same page twice and deadlocking. Rather than
+ * trying to be clever about what pages can be locked,
+ * avoid the use of lock_page for direct compaction
+ * altogether.
+ */
+ if (current->flags & PF_MEMALLOC)
goto move_newpage;
+
lock_page(page);
}
}
/* charge against new page */
- charge = mem_cgroup_prepare_migration(page, newpage, &mem);
+ charge = mem_cgroup_prepare_migration(page, newpage, &mem, GFP_KERNEL);
if (charge == -ENOMEM) {
rc = -ENOMEM;
goto unlock;
BUG_ON(charge);
if (PageWriteback(page)) {
+ /*
+ * For !sync, there is no point retrying as the retry loop
+ * is expected to be too short for PageWriteback to be cleared
+ */
+ if (!sync) {
+ rc = -EBUSY;
+ goto uncharge;
+ }
if (!force)
goto uncharge;
wait_on_page_writeback(page);
/*
* By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
* we cannot notice that anon_vma is freed while we migrates a page.
- * This rcu_read_lock() delays freeing anon_vma pointer until the end
+ * This get_anon_vma() delays freeing anon_vma pointer until the end
* of migration. File cache pages are no problem because of page_lock()
* File Caches may use write_page() or lock_page() in migration, then,
* just care Anon page here.
*/
if (PageAnon(page)) {
- rcu_read_lock();
- rcu_locked = 1;
-
- /* Determine how to safely use anon_vma */
- if (!page_mapped(page)) {
- if (!PageSwapCache(page))
- goto rcu_unlock;
-
+ /*
+ * Only page_lock_anon_vma() understands the subtleties of
+ * getting a hold on an anon_vma from outside one of its mms.
+ */
+ anon_vma = page_get_anon_vma(page);
+ if (anon_vma) {
+ /*
+ * Anon page
+ */
+ } else if (PageSwapCache(page)) {
/*
* We cannot be sure that the anon_vma of an unmapped
* swapcache page is safe to use because we don't
*/
remap_swapcache = 0;
} else {
- /*
- * Take a reference count on the anon_vma if the
- * page is mapped so that it is guaranteed to
- * exist when the page is remapped later
- */
- anon_vma = page_anon_vma(page);
- get_anon_vma(anon_vma);
+ goto uncharge;
}
}
* free the metadata, so the page can be freed.
*/
if (!page->mapping) {
- if (!PageAnon(page) && page_has_private(page)) {
- /*
- * Go direct to try_to_free_buffers() here because
- * a) that's what try_to_release_page() would do anyway
- * b) we may be under rcu_read_lock() here, so we can't
- * use GFP_KERNEL which is what try_to_release_page()
- * needs to be effective.
- */
+ VM_BUG_ON(PageAnon(page));
+ if (page_has_private(page)) {
try_to_free_buffers(page);
- goto rcu_unlock;
+ goto uncharge;
}
goto skip_unmap;
}
skip_unmap:
if (!page_mapped(page))
- rc = move_to_new_page(newpage, page, remap_swapcache);
+ rc = move_to_new_page(newpage, page, remap_swapcache, sync);
if (rc && remap_swapcache)
remove_migration_ptes(page, page);
-rcu_unlock:
/* Drop an anon_vma reference if we took one */
if (anon_vma)
- drop_anon_vma(anon_vma);
+ put_anon_vma(anon_vma);
- if (rcu_locked)
- rcu_read_unlock();
uncharge:
if (!charge)
- mem_cgroup_end_migration(mem, page, newpage);
+ mem_cgroup_end_migration(mem, page, newpage, rc == 0);
unlock:
unlock_page(page);
+move_newpage:
if (rc != -EAGAIN) {
/*
* A page that has been migrated has all references
putback_lru_page(page);
}
-move_newpage:
-
/*
* Move the new page to the LRU. If migration was not successful
* then this will free the page.
*/
static int unmap_and_move_huge_page(new_page_t get_new_page,
unsigned long private, struct page *hpage,
- int force, int offlining)
+ int force, bool offlining, bool sync)
{
int rc = 0;
int *result = NULL;
struct page *new_hpage = get_new_page(hpage, private, &result);
- int rcu_locked = 0;
struct anon_vma *anon_vma = NULL;
if (!new_hpage)
rc = -EAGAIN;
if (!trylock_page(hpage)) {
- if (!force)
+ if (!force || !sync)
goto out;
lock_page(hpage);
}
- if (PageAnon(hpage)) {
- rcu_read_lock();
- rcu_locked = 1;
-
- if (page_mapped(hpage)) {
- anon_vma = page_anon_vma(hpage);
- atomic_inc(&anon_vma->external_refcount);
- }
- }
+ if (PageAnon(hpage))
+ anon_vma = page_get_anon_vma(hpage);
try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
if (!page_mapped(hpage))
- rc = move_to_new_page(new_hpage, hpage, 1);
+ rc = move_to_new_page(new_hpage, hpage, 1, sync);
if (rc)
remove_migration_ptes(hpage, hpage);
- if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount,
- &anon_vma->lock)) {
- int empty = list_empty(&anon_vma->head);
- spin_unlock(&anon_vma->lock);
- if (empty)
- anon_vma_free(anon_vma);
- }
-
- if (rcu_locked)
- rcu_read_unlock();
+ if (anon_vma)
+ put_anon_vma(anon_vma);
out:
unlock_page(hpage);
*
* The function returns after 10 attempts or if no pages
* are movable anymore because to has become empty
- * or no retryable pages exist anymore. All pages will be
- * returned to the LRU or freed.
+ * or no retryable pages exist anymore.
+ * Caller should call putback_lru_pages to return pages to the LRU
+ * or free list only if ret != 0.
*
* Return: Number of pages not migrated or error code.
*/
int migrate_pages(struct list_head *from,
- new_page_t get_new_page, unsigned long private, int offlining)
+ new_page_t get_new_page, unsigned long private, bool offlining,
+ bool sync)
{
int retry = 1;
int nr_failed = 0;
cond_resched();
rc = unmap_and_move(get_new_page, private,
- page, pass > 2, offlining);
+ page, pass > 2, offlining,
+ sync);
switch(rc) {
case -ENOMEM:
if (!swapwrite)
current->flags &= ~PF_SWAPWRITE;
- putback_lru_pages(from);
-
if (rc)
return rc;
}
int migrate_huge_pages(struct list_head *from,
- new_page_t get_new_page, unsigned long private, int offlining)
+ new_page_t get_new_page, unsigned long private, bool offlining,
+ bool sync)
{
int retry = 1;
int nr_failed = 0;
cond_resched();
rc = unmap_and_move_huge_page(get_new_page,
- private, page, pass > 2, offlining);
+ private, page, pass > 2, offlining,
+ sync);
switch(rc) {
case -ENOMEM:
}
rc = 0;
out:
-
- list_for_each_entry_safe(page, page2, from, lru)
- put_page(page);
-
if (rc)
return rc;
err = -EFAULT;
vma = find_vma(mm, pp->addr);
- if (!vma || !vma_migratable(vma))
+ if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
goto set_status;
- page = follow_page(vma, pp->addr, FOLL_GET);
+ page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT);
err = PTR_ERR(page);
if (IS_ERR(page))
}
err = 0;
- if (!list_empty(&pagelist))
+ if (!list_empty(&pagelist)) {
err = migrate_pages(&pagelist, new_page_node,
- (unsigned long)pm, 0);
+ (unsigned long)pm, 0, true);
+ if (err)
+ putback_lru_pages(&pagelist);
+ }
up_read(&mm->mmap_sem);
return err;
int err = -EFAULT;
vma = find_vma(mm, addr);
- if (!vma)
+ if (!vma || addr < vma->vm_start)
goto set_status;
page = follow_page(vma, addr, 0);
return -EPERM;
/* Find the mm_struct */
- read_lock(&tasklist_lock);
+ rcu_read_lock();
task = pid ? find_task_by_vpid(pid) : current;
if (!task) {
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
return -ESRCH;
}
mm = get_task_mm(task);
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
if (!mm)
return -EINVAL;