Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu
Linus Torvalds [Wed, 4 Aug 2010 22:17:52 +0000 (15:17 -0700)]
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu:
  percpu: allow limited allocation before slab is online
  percpu: make @dyn_size always mean min dyn_size in first chunk init functions

include/linux/percpu.h
init/main.c
mm/percpu.c

index d3a38d6..b8b9084 100644 (file)
 #define PCPU_MIN_UNIT_SIZE             PFN_ALIGN(64 << 10)
 
 /*
+ * Percpu allocator can serve percpu allocations before slab is
+ * initialized which allows slab to depend on the percpu allocator.
+ * The following two parameters decide how much resource to
+ * preallocate for this.  Keep PERCPU_DYNAMIC_RESERVE equal to or
+ * larger than PERCPU_DYNAMIC_EARLY_SIZE.
+ */
+#define PERCPU_DYNAMIC_EARLY_SLOTS     128
+#define PERCPU_DYNAMIC_EARLY_SIZE      (12 << 10)
+
+/*
  * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
  * back on the first chunk for dynamic percpu allocation if arch is
  * manually allocating and mapping it for faster access (as a part of
@@ -104,16 +114,11 @@ extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
                                                             int nr_units);
 extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai);
 
-extern struct pcpu_alloc_info * __init pcpu_build_alloc_info(
-                               size_t reserved_size, ssize_t dyn_size,
-                               size_t atom_size,
-                               pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
-
 extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
                                         void *base_addr);
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
-extern int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
+extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
                                size_t atom_size,
                                pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
                                pcpu_fc_alloc_fn_t alloc_fn,
@@ -140,6 +145,7 @@ extern bool is_kernel_percpu_address(unsigned long addr);
 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
 extern void __init setup_per_cpu_areas(void);
 #endif
+extern void __init percpu_init_late(void);
 
 #else /* CONFIG_SMP */
 
@@ -153,6 +159,8 @@ static inline bool is_kernel_percpu_address(unsigned long addr)
 
 static inline void __init setup_per_cpu_areas(void) { }
 
+static inline void __init percpu_init_late(void) { }
+
 static inline void *pcpu_lpage_remapped(void *kaddr)
 {
        return NULL;
index a42fdf4..4ddb53f 100644 (file)
@@ -532,6 +532,7 @@ static void __init mm_init(void)
        page_cgroup_init_flatmem();
        mem_init();
        kmem_cache_init();
+       percpu_init_late();
        pgtable_cache_init();
        vmalloc_init();
 }
index 6470e77..e61dc2c 100644 (file)
@@ -282,6 +282,9 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
  */
 static void *pcpu_mem_alloc(size_t size)
 {
+       if (WARN_ON_ONCE(!slab_is_available()))
+               return NULL;
+
        if (size <= PAGE_SIZE)
                return kzalloc(size, GFP_KERNEL);
        else {
@@ -392,13 +395,6 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
        old_size = chunk->map_alloc * sizeof(chunk->map[0]);
        memcpy(new, chunk->map, old_size);
 
-       /*
-        * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
-        * one of the first chunks and still using static map.
-        */
-       if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
-               old = chunk->map;
-
        chunk->map_alloc = new_alloc;
        chunk->map = new;
        new = NULL;
@@ -604,7 +600,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
 {
        struct pcpu_chunk *chunk;
 
-       chunk = kzalloc(pcpu_chunk_struct_size, GFP_KERNEL);
+       chunk = pcpu_mem_alloc(pcpu_chunk_struct_size);
        if (!chunk)
                return NULL;
 
@@ -1013,20 +1009,6 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
                return page_to_phys(pcpu_addr_to_page(addr));
 }
 
-static inline size_t pcpu_calc_fc_sizes(size_t static_size,
-                                       size_t reserved_size,
-                                       ssize_t *dyn_sizep)
-{
-       size_t size_sum;
-
-       size_sum = PFN_ALIGN(static_size + reserved_size +
-                            (*dyn_sizep >= 0 ? *dyn_sizep : 0));
-       if (*dyn_sizep != 0)
-               *dyn_sizep = size_sum - static_size - reserved_size;
-
-       return size_sum;
-}
-
 /**
  * pcpu_alloc_alloc_info - allocate percpu allocation info
  * @nr_groups: the number of groups
@@ -1085,7 +1067,7 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
 /**
  * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
  * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @dyn_size: minimum free size for dynamic allocation in bytes
  * @atom_size: allocation atom size
  * @cpu_distance_fn: callback to determine distance between cpus, optional
  *
@@ -1103,8 +1085,8 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
  * On success, pointer to the new allocation_info is returned.  On
  * failure, ERR_PTR value is returned.
  */
-struct pcpu_alloc_info * __init pcpu_build_alloc_info(
-                               size_t reserved_size, ssize_t dyn_size,
+static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
+                               size_t reserved_size, size_t dyn_size,
                                size_t atom_size,
                                pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
 {
@@ -1123,13 +1105,17 @@ struct pcpu_alloc_info * __init pcpu_build_alloc_info(
        memset(group_map, 0, sizeof(group_map));
        memset(group_cnt, 0, sizeof(group_cnt));
 
+       /* calculate size_sum and ensure dyn_size is enough for early alloc */
+       size_sum = PFN_ALIGN(static_size + reserved_size +
+                           max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
+       dyn_size = size_sum - static_size - reserved_size;
+
        /*
         * Determine min_unit_size, alloc_size and max_upa such that
         * alloc_size is multiple of atom_size and is the smallest
         * which can accomodate 4k aligned segments which are equal to
         * or larger than min_unit_size.
         */
-       size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
        min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
 
        alloc_size = roundup(min_unit_size, atom_size);
@@ -1350,7 +1336,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
                                  void *base_addr)
 {
        static char cpus_buf[4096] __initdata;
-       static int smap[2], dmap[2];
+       static int smap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
+       static int dmap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata;
        size_t dyn_size = ai->dyn_size;
        size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
        struct pcpu_chunk *schunk, *dchunk = NULL;
@@ -1373,14 +1360,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 } while (0)
 
        /* sanity checks */
-       BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
-                    ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
        PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
        PCPU_SETUP_BUG_ON(!ai->static_size);
        PCPU_SETUP_BUG_ON(!base_addr);
        PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
        PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
        PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
+       PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE);
        PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
 
        /* process group information and build config tables accordingly */
@@ -1532,7 +1518,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
 /**
  * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
  * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @dyn_size: minimum free size for dynamic allocation in bytes
  * @atom_size: allocation atom size
  * @cpu_distance_fn: callback to determine distance between cpus, optional
  * @alloc_fn: function to allocate percpu page
@@ -1553,10 +1539,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
  * vmalloc space is not orders of magnitude larger than distances
  * between node memory addresses (ie. 32bit NUMA machines).
  *
- * When @dyn_size is positive, dynamic area might be larger than
- * specified to fill page alignment.  When @dyn_size is auto,
- * @dyn_size is just big enough to fill page alignment after static
- * and reserved areas.
+ * @dyn_size specifies the minimum dynamic area size.
  *
  * If the needed size is smaller than the minimum or specified unit
  * size, the leftover is returned using @free_fn.
@@ -1564,7 +1547,7 @@ early_param("percpu_alloc", percpu_alloc_setup);
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
+int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
                                  size_t atom_size,
                                  pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
                                  pcpu_fc_alloc_fn_t alloc_fn,
@@ -1695,7 +1678,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 
        snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
 
-       ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL);
+       ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL);
        if (IS_ERR(ai))
                return PTR_ERR(ai);
        BUG_ON(ai->nr_groups != 1);
@@ -1821,3 +1804,33 @@ void __init setup_per_cpu_areas(void)
                __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
 }
 #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
+
+/*
+ * First and reserved chunks are initialized with temporary allocation
+ * map in initdata so that they can be used before slab is online.
+ * This function is called after slab is brought up and replaces those
+ * with properly allocated maps.
+ */
+void __init percpu_init_late(void)
+{
+       struct pcpu_chunk *target_chunks[] =
+               { pcpu_first_chunk, pcpu_reserved_chunk, NULL };
+       struct pcpu_chunk *chunk;
+       unsigned long flags;
+       int i;
+
+       for (i = 0; (chunk = target_chunks[i]); i++) {
+               int *map;
+               const size_t size = PERCPU_DYNAMIC_EARLY_SLOTS * sizeof(map[0]);
+
+               BUILD_BUG_ON(size > PAGE_SIZE);
+
+               map = pcpu_mem_alloc(size);
+               BUG_ON(!map);
+
+               spin_lock_irqsave(&pcpu_lock, flags);
+               memcpy(map, chunk->map, size);
+               chunk->map = map;
+               spin_unlock_irqrestore(&pcpu_lock, flags);
+       }
+}