#include <linux/mm.h>
#include <linux/module.h>
#include <linux/highmem.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
#include <linux/rcupdate.h>
#include <linux/pfn.h>
#include <linux/kmemleak.h>
-#include <linux/highmem.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
-
+#include <asm/shmparam.h>
/*** Page table manipulation functions ***/
return ret;
}
-static inline int is_vmalloc_or_module_addr(const void *x)
+int is_vmalloc_or_module_addr(const void *x)
{
/*
* ARM, x86-64 and sparc64 put modules in a special place,
};
static DEFINE_SPINLOCK(vmap_area_lock);
-static struct rb_root vmap_area_root = RB_ROOT;
static LIST_HEAD(vmap_area_list);
+static struct rb_root vmap_area_root = RB_ROOT;
+
+/* The vmap cache globals are protected by vmap_area_lock */
+static struct rb_node *free_vmap_cache;
+static unsigned long cached_hole_size;
+static unsigned long cached_vstart;
+static unsigned long cached_align;
+
static unsigned long vmap_area_pcpu_hole;
static struct vmap_area *__find_vmap_area(unsigned long addr)
struct rb_node *tmp;
while (*p) {
- struct vmap_area *tmp;
+ struct vmap_area *tmp_va;
parent = *p;
- tmp = rb_entry(parent, struct vmap_area, rb_node);
- if (va->va_start < tmp->va_end)
+ tmp_va = rb_entry(parent, struct vmap_area, rb_node);
+ if (va->va_start < tmp_va->va_end)
p = &(*p)->rb_left;
- else if (va->va_end > tmp->va_start)
+ else if (va->va_end > tmp_va->va_start)
p = &(*p)->rb_right;
else
BUG();
struct rb_node *n;
unsigned long addr;
int purged = 0;
+ struct vmap_area *first;
BUG_ON(!size);
BUG_ON(size & ~PAGE_MASK);
+ BUG_ON(!is_power_of_2(align));
va = kmalloc_node(sizeof(struct vmap_area),
gfp_mask & GFP_RECLAIM_MASK, node);
return ERR_PTR(-ENOMEM);
retry:
- addr = ALIGN(vstart, align);
-
spin_lock(&vmap_area_lock);
- if (addr + size - 1 < addr)
- goto overflow;
+ /*
+ * Invalidate cache if we have more permissive parameters.
+ * cached_hole_size notes the largest hole noticed _below_
+ * the vmap_area cached in free_vmap_cache: if size fits
+ * into that hole, we want to scan from vstart to reuse
+ * the hole instead of allocating above free_vmap_cache.
+ * Note that __free_vmap_area may update free_vmap_cache
+ * without updating cached_hole_size or cached_align.
+ */
+ if (!free_vmap_cache ||
+ size < cached_hole_size ||
+ vstart < cached_vstart ||
+ align < cached_align) {
+nocache:
+ cached_hole_size = 0;
+ free_vmap_cache = NULL;
+ }
+ /* record if we encounter less permissive parameters */
+ cached_vstart = vstart;
+ cached_align = align;
+
+ /* find starting point for our search */
+ if (free_vmap_cache) {
+ first = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
+ addr = ALIGN(first->va_end, align);
+ if (addr < vstart)
+ goto nocache;
+ if (addr + size - 1 < addr)
+ goto overflow;
- /* XXX: could have a last_hole cache */
- n = vmap_area_root.rb_node;
- if (n) {
- struct vmap_area *first = NULL;
+ } else {
+ addr = ALIGN(vstart, align);
+ if (addr + size - 1 < addr)
+ goto overflow;
+
+ n = vmap_area_root.rb_node;
+ first = NULL;
- do {
+ while (n) {
struct vmap_area *tmp;
tmp = rb_entry(n, struct vmap_area, rb_node);
if (tmp->va_end >= addr) {
- if (!first && tmp->va_start < addr + size)
- first = tmp;
- n = n->rb_left;
- } else {
first = tmp;
+ if (tmp->va_start <= addr)
+ break;
+ n = n->rb_left;
+ } else
n = n->rb_right;
- }
- } while (n);
+ }
if (!first)
goto found;
-
- if (first->va_end < addr) {
- n = rb_next(&first->rb_node);
- if (n)
- first = rb_entry(n, struct vmap_area, rb_node);
- else
- goto found;
- }
-
- while (addr + size > first->va_start && addr + size <= vend) {
- addr = ALIGN(first->va_end + PAGE_SIZE, align);
- if (addr + size - 1 < addr)
- goto overflow;
-
- n = rb_next(&first->rb_node);
- if (n)
- first = rb_entry(n, struct vmap_area, rb_node);
- else
- goto found;
- }
}
-found:
- if (addr + size > vend) {
-overflow:
- spin_unlock(&vmap_area_lock);
- if (!purged) {
- purge_vmap_area_lazy();
- purged = 1;
- goto retry;
- }
- if (printk_ratelimit())
- printk(KERN_WARNING
- "vmap allocation for size %lu failed: "
- "use vmalloc=<size> to increase size.\n", size);
- kfree(va);
- return ERR_PTR(-EBUSY);
+
+ /* from the starting point, walk areas until a suitable hole is found */
+ while (addr + size > first->va_start && addr + size <= vend) {
+ if (addr + cached_hole_size < first->va_start)
+ cached_hole_size = first->va_start - addr;
+ addr = ALIGN(first->va_end, align);
+ if (addr + size - 1 < addr)
+ goto overflow;
+
+ n = rb_next(&first->rb_node);
+ if (n)
+ first = rb_entry(n, struct vmap_area, rb_node);
+ else
+ goto found;
}
- BUG_ON(addr & (align-1));
+found:
+ if (addr + size > vend)
+ goto overflow;
va->va_start = addr;
va->va_end = addr + size;
va->flags = 0;
__insert_vmap_area(va);
+ free_vmap_cache = &va->rb_node;
spin_unlock(&vmap_area_lock);
- return va;
-}
+ BUG_ON(va->va_start & (align-1));
+ BUG_ON(va->va_start < vstart);
+ BUG_ON(va->va_end > vend);
-static void rcu_free_va(struct rcu_head *head)
-{
- struct vmap_area *va = container_of(head, struct vmap_area, rcu_head);
+ return va;
+overflow:
+ spin_unlock(&vmap_area_lock);
+ if (!purged) {
+ purge_vmap_area_lazy();
+ purged = 1;
+ goto retry;
+ }
+ if (printk_ratelimit())
+ printk(KERN_WARNING
+ "vmap allocation for size %lu failed: "
+ "use vmalloc=<size> to increase size.\n", size);
kfree(va);
+ return ERR_PTR(-EBUSY);
}
static void __free_vmap_area(struct vmap_area *va)
{
BUG_ON(RB_EMPTY_NODE(&va->rb_node));
+
+ if (free_vmap_cache) {
+ if (va->va_end < cached_vstart) {
+ free_vmap_cache = NULL;
+ } else {
+ struct vmap_area *cache;
+ cache = rb_entry(free_vmap_cache, struct vmap_area, rb_node);
+ if (va->va_start <= cache->va_start) {
+ free_vmap_cache = rb_prev(&va->rb_node);
+ /*
+ * We don't try to update cached_hole_size or
+ * cached_align, but it won't go very wrong.
+ */
+ }
+ }
+ }
rb_erase(&va->rb_node, &vmap_area_root);
RB_CLEAR_NODE(&va->rb_node);
list_del_rcu(&va->list);
if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
- call_rcu(&va->rcu_head, rcu_free_va);
+ kfree_rcu(va, rcu_head);
}
/*
static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
+/* for per-CPU blocks */
+static void purge_fragmented_blocks_allcpus(void);
+
+/*
+ * called before a call to iounmap() if the caller wants vm_area_struct's
+ * immediately freed.
+ */
+void set_iounmap_nonlazy(void)
+{
+ atomic_set(&vmap_lazy_nr, lazy_max_pages()+1);
+}
+
/*
* Purges all lazily-freed vmap areas.
*
} else
spin_lock(&purge_lock);
+ if (sync)
+ purge_fragmented_blocks_allcpus();
+
rcu_read_lock();
list_for_each_entry_rcu(va, &vmap_area_list, list) {
if (va->flags & VM_LAZY_FREE) {
if (va->va_end > *end)
*end = va->va_end;
nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
- unmap_vmap_area(va);
list_add_tail(&va->purge_list, &valist);
va->flags |= VM_LAZY_FREEING;
va->flags &= ~VM_LAZY_FREE;
}
rcu_read_unlock();
- if (nr) {
- BUG_ON(nr > atomic_read(&vmap_lazy_nr));
+ if (nr)
atomic_sub(nr, &vmap_lazy_nr);
- }
if (nr || force_flush)
flush_tlb_kernel_range(*start, *end);
}
/*
- * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
- * called for the correct range previously.
+ * Free a vmap area, caller ensuring that the area has been unmapped
+ * and flush_cache_vunmap had been called for the correct range
+ * previously.
*/
-static void free_unmap_vmap_area_noflush(struct vmap_area *va)
+static void free_vmap_area_noflush(struct vmap_area *va)
{
va->flags |= VM_LAZY_FREE;
atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
try_purge_vmap_area_lazy();
}
+/*
+ * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
+ * called for the correct range previously.
+ */
+static void free_unmap_vmap_area_noflush(struct vmap_area *va)
+{
+ unmap_vmap_area(va);
+ free_vmap_area_noflush(va);
+}
+
/*
* Free and unmap a vmap area
*/
#define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2)
#define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y)) /* can't use min() */
#define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y)) /* can't use max() */
-#define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
- VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
- VMALLOC_PAGES / NR_CPUS / 16))
+#define VMAP_BBMAP_BITS \
+ VMAP_MIN(VMAP_BBMAP_BITS_MAX, \
+ VMAP_MAX(VMAP_BBMAP_BITS_MIN, \
+ VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16))
#define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE)
struct vmap_block_queue {
spinlock_t lock;
struct list_head free;
- struct list_head dirty;
- unsigned int nr_dirty;
};
struct vmap_block {
unsigned long free, dirty;
DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS);
DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
- union {
- struct list_head free_list;
- struct rcu_head rcu_head;
- };
+ struct list_head free_list;
+ struct rcu_head rcu_head;
+ struct list_head purge;
};
/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
va = alloc_vmap_area(VMAP_BLOCK_SIZE, VMAP_BLOCK_SIZE,
VMALLOC_START, VMALLOC_END,
node, gfp_mask);
- if (unlikely(IS_ERR(va))) {
+ if (IS_ERR(va)) {
kfree(vb);
- return ERR_PTR(PTR_ERR(va));
+ return ERR_CAST(va);
}
err = radix_tree_preload(gfp_mask);
vbq = &get_cpu_var(vmap_block_queue);
vb->vbq = vbq;
spin_lock(&vbq->lock);
- list_add(&vb->free_list, &vbq->free);
+ list_add_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
- put_cpu_var(vmap_cpu_blocks);
+ put_cpu_var(vmap_block_queue);
return vb;
}
-static void rcu_free_vb(struct rcu_head *head)
-{
- struct vmap_block *vb = container_of(head, struct vmap_block, rcu_head);
-
- kfree(vb);
-}
-
static void free_vmap_block(struct vmap_block *vb)
{
struct vmap_block *tmp;
unsigned long vb_idx;
- BUG_ON(!list_empty(&vb->free_list));
-
vb_idx = addr_to_vb_idx(vb->va->va_start);
spin_lock(&vmap_block_tree_lock);
tmp = radix_tree_delete(&vmap_block_tree, vb_idx);
spin_unlock(&vmap_block_tree_lock);
BUG_ON(tmp != vb);
- free_unmap_vmap_area_noflush(vb->va);
- call_rcu(&vb->rcu_head, rcu_free_vb);
+ free_vmap_area_noflush(vb->va);
+ kfree_rcu(vb, rcu_head);
+}
+
+static void purge_fragmented_blocks(int cpu)
+{
+ LIST_HEAD(purge);
+ struct vmap_block *vb;
+ struct vmap_block *n_vb;
+ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(vb, &vbq->free, free_list) {
+
+ if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS))
+ continue;
+
+ spin_lock(&vb->lock);
+ if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) {
+ vb->free = 0; /* prevent further allocs after releasing lock */
+ vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */
+ bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS);
+ bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS);
+ spin_lock(&vbq->lock);
+ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
+ spin_unlock(&vb->lock);
+ list_add_tail(&vb->purge, &purge);
+ } else
+ spin_unlock(&vb->lock);
+ }
+ rcu_read_unlock();
+
+ list_for_each_entry_safe(vb, n_vb, &purge, purge) {
+ list_del(&vb->purge);
+ free_vmap_block(vb);
+ }
+}
+
+static void purge_fragmented_blocks_thiscpu(void)
+{
+ purge_fragmented_blocks(smp_processor_id());
+}
+
+static void purge_fragmented_blocks_allcpus(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ purge_fragmented_blocks(cpu);
}
static void *vb_alloc(unsigned long size, gfp_t gfp_mask)
struct vmap_block *vb;
unsigned long addr = 0;
unsigned int order;
+ int purge = 0;
BUG_ON(size & ~PAGE_MASK);
BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
int i;
spin_lock(&vb->lock);
+ if (vb->free < 1UL << order)
+ goto next;
+
i = bitmap_find_free_region(vb->alloc_map,
VMAP_BBMAP_BITS, order);
- if (i >= 0) {
- addr = vb->va->va_start + (i << PAGE_SHIFT);
- BUG_ON(addr_to_vb_idx(addr) !=
- addr_to_vb_idx(vb->va->va_start));
- vb->free -= 1UL << order;
- if (vb->free == 0) {
- spin_lock(&vbq->lock);
- list_del_init(&vb->free_list);
- spin_unlock(&vbq->lock);
+ if (i < 0) {
+ if (vb->free + vb->dirty == VMAP_BBMAP_BITS) {
+ /* fragmented and no outstanding allocations */
+ BUG_ON(vb->dirty != VMAP_BBMAP_BITS);
+ purge = 1;
}
- spin_unlock(&vb->lock);
- break;
+ goto next;
+ }
+ addr = vb->va->va_start + (i << PAGE_SHIFT);
+ BUG_ON(addr_to_vb_idx(addr) !=
+ addr_to_vb_idx(vb->va->va_start));
+ vb->free -= 1UL << order;
+ if (vb->free == 0) {
+ spin_lock(&vbq->lock);
+ list_del_rcu(&vb->free_list);
+ spin_unlock(&vbq->lock);
}
spin_unlock(&vb->lock);
+ break;
+next:
+ spin_unlock(&vb->lock);
}
- put_cpu_var(vmap_cpu_blocks);
+
+ if (purge)
+ purge_fragmented_blocks_thiscpu();
+
+ put_cpu_var(vmap_block_queue);
rcu_read_unlock();
if (!addr) {
rcu_read_unlock();
BUG_ON(!vb);
+ vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
+
spin_lock(&vb->lock);
- bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order);
+ BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
vb->dirty += 1UL << order;
if (vb->dirty == VMAP_BBMAP_BITS) {
- BUG_ON(vb->free || !list_empty(&vb->free_list));
+ BUG_ON(vb->free);
spin_unlock(&vb->lock);
free_vmap_block(vb);
} else
s = vb->va->va_start + (i << PAGE_SHIFT);
e = vb->va->va_start + (j << PAGE_SHIFT);
- vunmap_page_range(s, e);
flush = 1;
if (s < start)
vbq = &per_cpu(vmap_block_queue, i);
spin_lock_init(&vbq->lock);
INIT_LIST_HEAD(&vbq->free);
- INIT_LIST_HEAD(&vbq->dirty);
- vbq->nr_dirty = 0;
}
/* Import existing vmlist entries. */
{
vunmap_page_range(addr, addr + size);
}
+EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
/**
* unmap_kernel_range - unmap kernel VM area and flush cache and TLB
}
static struct vm_struct *__get_vm_area_node(unsigned long size,
- unsigned long flags, unsigned long start, unsigned long end,
- int node, gfp_t gfp_mask, void *caller)
+ unsigned long align, unsigned long flags, unsigned long start,
+ unsigned long end, int node, gfp_t gfp_mask, void *caller)
{
static struct vmap_area *va;
struct vm_struct *area;
- unsigned long align = 1;
BUG_ON(in_interrupt());
if (flags & VM_IOREMAP) {
struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
unsigned long start, unsigned long end)
{
- return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL,
+ return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
__builtin_return_address(0));
}
EXPORT_SYMBOL_GPL(__get_vm_area);
unsigned long start, unsigned long end,
void *caller)
{
- return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL,
+ return __get_vm_area_node(size, 1, flags, start, end, -1, GFP_KERNEL,
caller);
}
*/
struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
{
- return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END,
+ return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
-1, GFP_KERNEL, __builtin_return_address(0));
}
struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags,
void *caller)
{
- return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END,
+ return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END,
-1, GFP_KERNEL, caller);
}
-struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
- int node, gfp_t gfp_mask)
-{
- return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node,
- gfp_mask, __builtin_return_address(0));
-}
-
static struct vm_struct *find_vm_area(const void *addr)
{
struct vmap_area *va;
}
EXPORT_SYMBOL(vmap);
-static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
+static void *__vmalloc_node(unsigned long size, unsigned long align,
+ gfp_t gfp_mask, pgprot_t prot,
int node, void *caller);
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, int node, void *caller)
{
+ const int order = 0;
struct page **pages;
unsigned int nr_pages, array_size, i;
+ gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
array_size = (nr_pages * sizeof(struct page *));
area->nr_pages = nr_pages;
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {
- pages = __vmalloc_node(array_size, gfp_mask | __GFP_ZERO,
+ pages = __vmalloc_node(array_size, 1, nested_gfp|__GFP_HIGHMEM,
PAGE_KERNEL, node, caller);
area->flags |= VM_VPAGES;
} else {
- pages = kmalloc_node(array_size,
- (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO,
- node);
+ pages = kmalloc_node(array_size, nested_gfp, node);
}
area->pages = pages;
area->caller = caller;
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
+ gfp_t tmp_mask = gfp_mask | __GFP_NOWARN;
if (node < 0)
- page = alloc_page(gfp_mask);
+ page = alloc_page(tmp_mask);
else
- page = alloc_pages_node(node, gfp_mask, 0);
+ page = alloc_pages_node(node, tmp_mask, order);
if (unlikely(!page)) {
/* Successfully allocated i pages, free them in __vunmap() */
return area->addr;
fail:
+ warn_alloc_failed(gfp_mask, order, "vmalloc: allocation failure, "
+ "allocated %ld of %ld bytes\n",
+ (area->nr_pages*PAGE_SIZE), area->size);
vfree(area->addr);
return NULL;
}
-void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot)
-{
- void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1,
- __builtin_return_address(0));
-
- /*
- * A ref_count = 3 is needed because the vm_struct and vmap_area
- * structures allocated in the __get_vm_area_node() function contain
- * references to the virtual address of the vmalloc'ed block.
- */
- kmemleak_alloc(addr, area->size - PAGE_SIZE, 3, gfp_mask);
-
- return addr;
-}
-
/**
- * __vmalloc_node - allocate virtually contiguous memory
+ * __vmalloc_node_range - allocate virtually contiguous memory
* @size: allocation size
+ * @align: desired alignment
+ * @start: vm area range start
+ * @end: vm area range end
* @gfp_mask: flags for the page level allocator
* @prot: protection mask for the allocated pages
* @node: node to use for allocation or -1
* allocator with @gfp_mask flags. Map them into contiguous
* kernel virtual space, using a pagetable protection of @prot.
*/
-static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
- int node, void *caller)
+void *__vmalloc_node_range(unsigned long size, unsigned long align,
+ unsigned long start, unsigned long end, gfp_t gfp_mask,
+ pgprot_t prot, int node, void *caller)
{
struct vm_struct *area;
void *addr;
if (!size || (size >> PAGE_SHIFT) > totalram_pages)
return NULL;
- area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
- node, gfp_mask, caller);
+ area = __get_vm_area_node(size, align, VM_ALLOC, start, end, node,
+ gfp_mask, caller);
if (!area)
return NULL;
return addr;
}
+/**
+ * __vmalloc_node - allocate virtually contiguous memory
+ * @size: allocation size
+ * @align: desired alignment
+ * @gfp_mask: flags for the page level allocator
+ * @prot: protection mask for the allocated pages
+ * @node: node to use for allocation or -1
+ * @caller: caller's return address
+ *
+ * Allocate enough pages to cover @size from the page level
+ * allocator with @gfp_mask flags. Map them into contiguous
+ * kernel virtual space, using a pagetable protection of @prot.
+ */
+static void *__vmalloc_node(unsigned long size, unsigned long align,
+ gfp_t gfp_mask, pgprot_t prot,
+ int node, void *caller)
+{
+ return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
+ gfp_mask, prot, node, caller);
+}
+
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
{
- return __vmalloc_node(size, gfp_mask, prot, -1,
+ return __vmalloc_node(size, 1, gfp_mask, prot, -1,
__builtin_return_address(0));
}
EXPORT_SYMBOL(__vmalloc);
+static inline void *__vmalloc_node_flags(unsigned long size,
+ int node, gfp_t flags)
+{
+ return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
+ node, __builtin_return_address(0));
+}
+
/**
* vmalloc - allocate virtually contiguous memory
* @size: allocation size
*/
void *vmalloc(unsigned long size)
{
- return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
- -1, __builtin_return_address(0));
+ return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM);
}
EXPORT_SYMBOL(vmalloc);
+/**
+ * vzalloc - allocate virtually contiguous memory with zero fill
+ * @size: allocation size
+ * Allocate enough pages to cover @size from the page level
+ * allocator and map them into contiguous kernel virtual space.
+ * The memory allocated is set to zero.
+ *
+ * For tight control over page level allocator and protection flags
+ * use __vmalloc() instead.
+ */
+void *vzalloc(unsigned long size)
+{
+ return __vmalloc_node_flags(size, -1,
+ GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+}
+EXPORT_SYMBOL(vzalloc);
+
/**
* vmalloc_user - allocate zeroed virtually contiguous memory for userspace
* @size: allocation size
struct vm_struct *area;
void *ret;
- ret = __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+ ret = __vmalloc_node(size, SHMLBA,
+ GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL, -1, __builtin_return_address(0));
if (ret) {
area = find_vm_area(ret);
*/
void *vmalloc_node(unsigned long size, int node)
{
- return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
+ return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL,
node, __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_node);
+/**
+ * vzalloc_node - allocate memory on a specific node with zero fill
+ * @size: allocation size
+ * @node: numa node
+ *
+ * Allocate enough pages to cover @size from the page level
+ * allocator and map them into contiguous kernel virtual space.
+ * The memory allocated is set to zero.
+ *
+ * For tight control over page level allocator and protection flags
+ * use __vmalloc_node() instead.
+ */
+void *vzalloc_node(unsigned long size, int node)
+{
+ return __vmalloc_node_flags(size, node,
+ GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+}
+EXPORT_SYMBOL(vzalloc_node);
+
#ifndef PAGE_KERNEL_EXEC
# define PAGE_KERNEL_EXEC PAGE_KERNEL
#endif
void *vmalloc_exec(unsigned long size)
{
- return __vmalloc_node(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
+ return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC,
-1, __builtin_return_address(0));
}
*/
void *vmalloc_32(unsigned long size)
{
- return __vmalloc_node(size, GFP_VMALLOC32, PAGE_KERNEL,
+ return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
-1, __builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_32);
struct vm_struct *area;
void *ret;
- ret = __vmalloc_node(size, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
+ ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
-1, __builtin_return_address(0));
if (ret) {
area = find_vm_area(ret);
* should know vmalloc() area is valid and can use memcpy().
* This is for routines which have to access vmalloc area without
* any informaion, as /dev/kmem.
- *
- * The caller should guarantee KM_USER1 is not used.
*/
long vwrite(char *buf, char *addr, unsigned long count)
return NULL;
}
- /* Make sure the pagetables are constructed in process kernel
- mappings */
- vmalloc_sync_all();
-
return area;
}
EXPORT_SYMBOL_GPL(alloc_vm_area);
}
EXPORT_SYMBOL_GPL(free_vm_area);
+#ifdef CONFIG_SMP
static struct vmap_area *node_to_va(struct rb_node *n)
{
return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
* @sizes: array containing size of each area
* @nr_vms: the number of areas to allocate
* @align: alignment, all entries in @offsets and @sizes must be aligned to this
- * @gfp_mask: allocation mask
*
* Returns: kmalloc'd vm_struct pointer array pointing to allocated
* vm_structs on success, %NULL on failure
*
* Percpu allocator wants to use congruent vm areas so that it can
* maintain the offsets among percpu areas. This function allocates
- * congruent vmalloc areas for it. These areas tend to be scattered
- * pretty far, distance between two areas easily going up to
- * gigabytes. To avoid interacting with regular vmallocs, these areas
- * are allocated from top.
+ * congruent vmalloc areas for it with GFP_KERNEL. These areas tend to
+ * be scattered pretty far, distance between two areas easily going up
+ * to gigabytes. To avoid interacting with regular vmallocs, these
+ * areas are allocated from top.
*
* Despite its complicated look, this allocator is rather simple. It
* does everything top-down and scans areas from the end looking for
*/
struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
const size_t *sizes, int nr_vms,
- size_t align, gfp_t gfp_mask)
+ size_t align)
{
const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
unsigned long base, start, end, last_end;
bool purged = false;
- gfp_mask &= GFP_RECLAIM_MASK;
-
/* verify parameters and allocate data structures */
BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
for (last_area = 0, area = 0; area < nr_vms; area++) {
return NULL;
}
- vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask);
- vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask);
+ vms = kzalloc(sizeof(vms[0]) * nr_vms, GFP_KERNEL);
+ vas = kzalloc(sizeof(vas[0]) * nr_vms, GFP_KERNEL);
if (!vas || !vms)
goto err_free;
for (area = 0; area < nr_vms; area++) {
- vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask);
- vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask);
+ vas[area] = kzalloc(sizeof(struct vmap_area), GFP_KERNEL);
+ vms[area] = kzalloc(sizeof(struct vm_struct), GFP_KERNEL);
if (!vas[area] || !vms[area])
goto err_free;
}
free_vm_area(vms[i]);
kfree(vms);
}
+#endif /* CONFIG_SMP */
#ifdef CONFIG_PROC_FS
static void *s_start(struct seq_file *m, loff_t *pos)
+ __acquires(&vmlist_lock)
{
loff_t n = *pos;
struct vm_struct *v;
}
static void s_stop(struct seq_file *m, void *p)
+ __releases(&vmlist_lock)
{
read_unlock(&vmlist_lock);
}
seq_printf(m, "0x%p-0x%p %7ld",
v->addr, v->addr + v->size, v->size);
- if (v->caller) {
- char buff[KSYM_SYMBOL_LEN];
-
- seq_putc(m, ' ');
- sprint_symbol(buff, (unsigned long)v->caller);
- seq_puts(m, buff);
- }
+ if (v->caller)
+ seq_printf(m, " %pS", v->caller);
if (v->nr_pages)
seq_printf(m, " pages=%d", v->nr_pages);
if (v->phys_addr)
- seq_printf(m, " phys=%lx", v->phys_addr);
+ seq_printf(m, " phys=%llx", (unsigned long long)v->phys_addr);
if (v->flags & VM_IOREMAP)
seq_printf(m, " ioremap");
unsigned int *ptr = NULL;
int ret;
- if (NUMA_BUILD)
+ if (NUMA_BUILD) {
ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
+ if (ptr == NULL)
+ return -ENOMEM;
+ }
ret = seq_open(file, &vmalloc_op);
if (!ret) {
struct seq_file *m = file->private_data;