memcg: add interface to move charge at task migration
[linux-2.6.git] / mm / highmem.c
index ce2e7e8..9c1e627 100644 (file)
 #include <linux/highmem.h>
 #include <asm/tlbflush.h>
 
-static mempool_t *page_pool, *isa_page_pool;
-
-static void *page_pool_alloc_isa(gfp_t gfp_mask, void *data)
-{
-       return alloc_page(gfp_mask | GFP_DMA);
-}
-
-static void page_pool_free(void *page, void *data)
-{
-       __free_page(page);
-}
-
 /*
  * Virtual_count is not a pure "count".
  *  0 means that it is not mapped, and has not been mapped
@@ -50,9 +38,24 @@ static void page_pool_free(void *page, void *data)
  */
 #ifdef CONFIG_HIGHMEM
 
-static void *page_pool_alloc(gfp_t gfp_mask, void *data)
+unsigned long totalhigh_pages __read_mostly;
+EXPORT_SYMBOL(totalhigh_pages);
+
+unsigned int nr_free_highpages (void)
 {
-       return alloc_page(gfp_mask);
+       pg_data_t *pgdat;
+       unsigned int pages = 0;
+
+       for_each_online_pgdat(pgdat) {
+               pages += zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
+                       NR_FREE_PAGES);
+               if (zone_movable_is_highmem())
+                       pages += zone_page_state(
+                                       &pgdat->node_zones[ZONE_MOVABLE],
+                                       NR_FREE_PAGES);
+       }
+
+       return pages;
 }
 
 static int pkmap_count[LAST_PKMAP];
@@ -63,9 +66,29 @@ pte_t * pkmap_page_table;
 
 static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
 
+/*
+ * Most architectures have no use for kmap_high_get(), so let's abstract
+ * the disabling of IRQ out of the locking in that case to save on a
+ * potential useless overhead.
+ */
+#ifdef ARCH_NEEDS_KMAP_HIGH_GET
+#define lock_kmap()             spin_lock_irq(&kmap_lock)
+#define unlock_kmap()           spin_unlock_irq(&kmap_lock)
+#define lock_kmap_any(flags)    spin_lock_irqsave(&kmap_lock, flags)
+#define unlock_kmap_any(flags)  spin_unlock_irqrestore(&kmap_lock, flags)
+#else
+#define lock_kmap()             spin_lock(&kmap_lock)
+#define unlock_kmap()           spin_unlock(&kmap_lock)
+#define lock_kmap_any(flags)    \
+               do { spin_lock(&kmap_lock); (void)(flags); } while (0)
+#define unlock_kmap_any(flags)  \
+               do { spin_unlock(&kmap_lock); (void)(flags); } while (0)
+#endif
+
 static void flush_all_zero_pkmaps(void)
 {
        int i;
+       int need_flush = 0;
 
        flush_cache_kmaps();
 
@@ -83,8 +106,7 @@ static void flush_all_zero_pkmaps(void)
                pkmap_count[i] = 0;
 
                /* sanity check */
-               if (pte_none(pkmap_page_table[i]))
-                       BUG();
+               BUG_ON(pte_none(pkmap_page_table[i]));
 
                /*
                 * Don't need an atomic fetch-and-clear op here;
@@ -98,8 +120,20 @@ static void flush_all_zero_pkmaps(void)
                          &pkmap_page_table[i]);
 
                set_page_address(page, NULL);
+               need_flush = 1;
        }
-       flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
+       if (need_flush)
+               flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
+}
+
+/**
+ * kmap_flush_unused - flush all unused kmap mappings in order to remove stray mappings
+ */
+void kmap_flush_unused(void)
+{
+       lock_kmap();
+       flush_all_zero_pkmaps();
+       unlock_kmap();
 }
 
 static inline unsigned long map_new_virtual(struct page *page)
@@ -129,10 +163,10 @@ start:
 
                        __set_current_state(TASK_UNINTERRUPTIBLE);
                        add_wait_queue(&pkmap_map_wait, &wait);
-                       spin_unlock(&kmap_lock);
+                       unlock_kmap();
                        schedule();
                        remove_wait_queue(&pkmap_map_wait, &wait);
-                       spin_lock(&kmap_lock);
+                       lock_kmap();
 
                        /* Somebody else might have mapped it while we slept */
                        if (page_address(page))
@@ -152,39 +186,77 @@ start:
        return vaddr;
 }
 
-void fastcall *kmap_high(struct page *page)
+/**
+ * kmap_high - map a highmem page into memory
+ * @page: &struct page to map
+ *
+ * Returns the page's virtual memory address.
+ *
+ * We cannot call this from interrupts, as it may block.
+ */
+void *kmap_high(struct page *page)
 {
        unsigned long vaddr;
 
        /*
         * For highmem pages, we can't trust "virtual" until
         * after we have the lock.
-        *
-        * We cannot call this from interrupts, as it may block
         */
-       spin_lock(&kmap_lock);
+       lock_kmap();
        vaddr = (unsigned long)page_address(page);
        if (!vaddr)
                vaddr = map_new_virtual(page);
        pkmap_count[PKMAP_NR(vaddr)]++;
-       if (pkmap_count[PKMAP_NR(vaddr)] < 2)
-               BUG();
-       spin_unlock(&kmap_lock);
+       BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2);
+       unlock_kmap();
        return (void*) vaddr;
 }
 
 EXPORT_SYMBOL(kmap_high);
 
-void fastcall kunmap_high(struct page *page)
+#ifdef ARCH_NEEDS_KMAP_HIGH_GET
+/**
+ * kmap_high_get - pin a highmem page into memory
+ * @page: &struct page to pin
+ *
+ * Returns the page's current virtual memory address, or NULL if no mapping
+ * exists.  When and only when a non null address is returned then a
+ * matching call to kunmap_high() is necessary.
+ *
+ * This can be called from any context.
+ */
+void *kmap_high_get(struct page *page)
+{
+       unsigned long vaddr, flags;
+
+       lock_kmap_any(flags);
+       vaddr = (unsigned long)page_address(page);
+       if (vaddr) {
+               BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 1);
+               pkmap_count[PKMAP_NR(vaddr)]++;
+       }
+       unlock_kmap_any(flags);
+       return (void*) vaddr;
+}
+#endif
+
+/**
+ * kunmap_high - map a highmem page into memory
+ * @page: &struct page to unmap
+ *
+ * If ARCH_NEEDS_KMAP_HIGH_GET is not defined then this may be called
+ * only from user context.
+ */
+void kunmap_high(struct page *page)
 {
        unsigned long vaddr;
        unsigned long nr;
+       unsigned long flags;
        int need_wakeup;
 
-       spin_lock(&kmap_lock);
+       lock_kmap_any(flags);
        vaddr = (unsigned long)page_address(page);
-       if (!vaddr)
-               BUG();
+       BUG_ON(!vaddr);
        nr = PKMAP_NR(vaddr);
 
        /*
@@ -208,7 +280,7 @@ void fastcall kunmap_high(struct page *page)
                 */
                need_wakeup = waitqueue_active(&pkmap_map_wait);
        }
-       spin_unlock(&kmap_lock);
+       unlock_kmap_any(flags);
 
        /* do wake-up, if needed, race-free outside of the spin lock */
        if (need_wakeup)
@@ -216,281 +288,8 @@ void fastcall kunmap_high(struct page *page)
 }
 
 EXPORT_SYMBOL(kunmap_high);
-
-#define POOL_SIZE      64
-
-static __init int init_emergency_pool(void)
-{
-       struct sysinfo i;
-       si_meminfo(&i);
-       si_swapinfo(&i);
-        
-       if (!i.totalhigh)
-               return 0;
-
-       page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, NULL);
-       if (!page_pool)
-               BUG();
-       printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
-
-       return 0;
-}
-
-__initcall(init_emergency_pool);
-
-/*
- * highmem version, map in to vec
- */
-static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
-{
-       unsigned long flags;
-       unsigned char *vto;
-
-       local_irq_save(flags);
-       vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
-       memcpy(vto + to->bv_offset, vfrom, to->bv_len);
-       kunmap_atomic(vto, KM_BOUNCE_READ);
-       local_irq_restore(flags);
-}
-
-#else /* CONFIG_HIGHMEM */
-
-#define bounce_copy_vec(to, vfrom)     \
-       memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
-
 #endif
 
-#define ISA_POOL_SIZE  16
-
-/*
- * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
- * as the max address, so check if the pool has already been created.
- */
-int init_emergency_isa_pool(void)
-{
-       if (isa_page_pool)
-               return 0;
-
-       isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc_isa, page_pool_free, NULL);
-       if (!isa_page_pool)
-               BUG();
-
-       printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
-       return 0;
-}
-
-/*
- * Simple bounce buffer support for highmem pages. Depending on the
- * queue gfp mask set, *to may or may not be a highmem page. kmap it
- * always, it will do the Right Thing
- */
-static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
-{
-       unsigned char *vfrom;
-       struct bio_vec *tovec, *fromvec;
-       int i;
-
-       __bio_for_each_segment(tovec, to, i, 0) {
-               fromvec = from->bi_io_vec + i;
-
-               /*
-                * not bounced
-                */
-               if (tovec->bv_page == fromvec->bv_page)
-                       continue;
-
-               /*
-                * fromvec->bv_offset and fromvec->bv_len might have been
-                * modified by the block layer, so use the original copy,
-                * bounce_copy_vec already uses tovec->bv_len
-                */
-               vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
-
-               flush_dcache_page(tovec->bv_page);
-               bounce_copy_vec(tovec, vfrom);
-       }
-}
-
-static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
-{
-       struct bio *bio_orig = bio->bi_private;
-       struct bio_vec *bvec, *org_vec;
-       int i;
-
-       if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
-               set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
-
-       /*
-        * free up bounce indirect pages used
-        */
-       __bio_for_each_segment(bvec, bio, i, 0) {
-               org_vec = bio_orig->bi_io_vec + i;
-               if (bvec->bv_page == org_vec->bv_page)
-                       continue;
-
-               mempool_free(bvec->bv_page, pool);      
-               dec_page_state(nr_bounce);
-       }
-
-       bio_endio(bio_orig, bio_orig->bi_size, err);
-       bio_put(bio);
-}
-
-static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done,int err)
-{
-       if (bio->bi_size)
-               return 1;
-
-       bounce_end_io(bio, page_pool, err);
-       return 0;
-}
-
-static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
-{
-       if (bio->bi_size)
-               return 1;
-
-       bounce_end_io(bio, isa_page_pool, err);
-       return 0;
-}
-
-static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
-{
-       struct bio *bio_orig = bio->bi_private;
-
-       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
-               copy_to_high_bio_irq(bio_orig, bio);
-
-       bounce_end_io(bio, pool, err);
-}
-
-static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
-{
-       if (bio->bi_size)
-               return 1;
-
-       __bounce_end_io_read(bio, page_pool, err);
-       return 0;
-}
-
-static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
-{
-       if (bio->bi_size)
-               return 1;
-
-       __bounce_end_io_read(bio, isa_page_pool, err);
-       return 0;
-}
-
-static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
-                       mempool_t *pool)
-{
-       struct page *page;
-       struct bio *bio = NULL;
-       int i, rw = bio_data_dir(*bio_orig);
-       struct bio_vec *to, *from;
-
-       bio_for_each_segment(from, *bio_orig, i) {
-               page = from->bv_page;
-
-               /*
-                * is destination page below bounce pfn?
-                */
-               if (page_to_pfn(page) < q->bounce_pfn)
-                       continue;
-
-               /*
-                * irk, bounce it
-                */
-               if (!bio)
-                       bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
-
-               to = bio->bi_io_vec + i;
-
-               to->bv_page = mempool_alloc(pool, q->bounce_gfp);
-               to->bv_len = from->bv_len;
-               to->bv_offset = from->bv_offset;
-               inc_page_state(nr_bounce);
-
-               if (rw == WRITE) {
-                       char *vto, *vfrom;
-
-                       flush_dcache_page(from->bv_page);
-                       vto = page_address(to->bv_page) + to->bv_offset;
-                       vfrom = kmap(from->bv_page) + from->bv_offset;
-                       memcpy(vto, vfrom, to->bv_len);
-                       kunmap(from->bv_page);
-               }
-       }
-
-       /*
-        * no pages bounced
-        */
-       if (!bio)
-               return;
-
-       /*
-        * at least one page was bounced, fill in possible non-highmem
-        * pages
-        */
-       __bio_for_each_segment(from, *bio_orig, i, 0) {
-               to = bio_iovec_idx(bio, i);
-               if (!to->bv_page) {
-                       to->bv_page = from->bv_page;
-                       to->bv_len = from->bv_len;
-                       to->bv_offset = from->bv_offset;
-               }
-       }
-
-       bio->bi_bdev = (*bio_orig)->bi_bdev;
-       bio->bi_flags |= (1 << BIO_BOUNCED);
-       bio->bi_sector = (*bio_orig)->bi_sector;
-       bio->bi_rw = (*bio_orig)->bi_rw;
-
-       bio->bi_vcnt = (*bio_orig)->bi_vcnt;
-       bio->bi_idx = (*bio_orig)->bi_idx;
-       bio->bi_size = (*bio_orig)->bi_size;
-
-       if (pool == page_pool) {
-               bio->bi_end_io = bounce_end_io_write;
-               if (rw == READ)
-                       bio->bi_end_io = bounce_end_io_read;
-       } else {
-               bio->bi_end_io = bounce_end_io_write_isa;
-               if (rw == READ)
-                       bio->bi_end_io = bounce_end_io_read_isa;
-       }
-
-       bio->bi_private = *bio_orig;
-       *bio_orig = bio;
-}
-
-void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
-{
-       mempool_t *pool;
-
-       /*
-        * for non-isa bounce case, just check if the bounce pfn is equal
-        * to or bigger than the highest pfn in the system -- in that case,
-        * don't waste time iterating over bio segments
-        */
-       if (!(q->bounce_gfp & GFP_DMA)) {
-               if (q->bounce_pfn >= blk_max_pfn)
-                       return;
-               pool = page_pool;
-       } else {
-               BUG_ON(!isa_page_pool);
-               pool = isa_page_pool;
-       }
-
-       /*
-        * slow path
-        */
-       __blk_queue_bounce(q, bio_orig, pool);
-}
-
-EXPORT_SYMBOL(blk_queue_bounce);
-
 #if defined(HASHED_PAGE_VIRTUAL)
 
 #define PA_HASH_ORDER  7
@@ -523,6 +322,12 @@ static struct page_address_slot *page_slot(struct page *page)
        return &page_address_htable[hash_ptr(page, PA_HASH_ORDER)];
 }
 
+/**
+ * page_address - get the mapped virtual address of a page
+ * @page: &struct page to get the virtual address of
+ *
+ * Returns the page's virtual address.
+ */
 void *page_address(struct page *page)
 {
        unsigned long flags;
@@ -552,6 +357,11 @@ done:
 
 EXPORT_SYMBOL(page_address);
 
+/**
+ * set_page_address - set a page's virtual address
+ * @page: &struct page to set
+ * @virtual: virtual address to use
+ */
 void set_page_address(struct page *page, void *virtual)
 {
        unsigned long flags;
@@ -611,3 +421,55 @@ void __init page_address_init(void)
 }
 
 #endif /* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
+
+#if defined(CONFIG_DEBUG_HIGHMEM) && defined(CONFIG_TRACE_IRQFLAGS_SUPPORT)
+
+void debug_kmap_atomic(enum km_type type)
+{
+       static int warn_count = 10;
+
+       if (unlikely(warn_count < 0))
+               return;
+
+       if (unlikely(in_interrupt())) {
+               if (in_nmi()) {
+                       if (type != KM_NMI && type != KM_NMI_PTE) {
+                               WARN_ON(1);
+                               warn_count--;
+                       }
+               } else if (in_irq()) {
+                       if (type != KM_IRQ0 && type != KM_IRQ1 &&
+                           type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
+                           type != KM_BOUNCE_READ && type != KM_IRQ_PTE) {
+                               WARN_ON(1);
+                               warn_count--;
+                       }
+               } else if (!irqs_disabled()) {  /* softirq */
+                       if (type != KM_IRQ0 && type != KM_IRQ1 &&
+                           type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
+                           type != KM_SKB_SUNRPC_DATA &&
+                           type != KM_SKB_DATA_SOFTIRQ &&
+                           type != KM_BOUNCE_READ) {
+                               WARN_ON(1);
+                               warn_count--;
+                       }
+               }
+       }
+
+       if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
+                       type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ ||
+                       type == KM_IRQ_PTE || type == KM_NMI ||
+                       type == KM_NMI_PTE ) {
+               if (!irqs_disabled()) {
+                       WARN_ON(1);
+                       warn_count--;
+               }
+       } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
+               if (irq_count() == 0 && !irqs_disabled()) {
+                       WARN_ON(1);
+                       warn_count--;
+               }
+       }
+}
+
+#endif