[SPARC64]: Move over to sparsemem.
David S. Miller [Wed, 8 Mar 2006 10:16:07 +0000 (02:16 -0800)]
This has been pending for a long time, and the fact
that we waste a ton of ram on some configurations
kind of pushed things over the edge.

Signed-off-by: David S. Miller <davem@davemloft.net>

arch/sparc64/Kconfig
arch/sparc64/kernel/sparc64_ksyms.c
arch/sparc64/mm/init.c
include/asm-sparc64/numnodes.h [new file with mode: 0644]
include/asm-sparc64/page.h
include/asm-sparc64/pgtable.h
include/asm-sparc64/sparsemem.h [new file with mode: 0644]

index 4c0a50a..a253a39 100644 (file)
@@ -186,6 +186,12 @@ endchoice
 
 endmenu
 
+config ARCH_SPARSEMEM_ENABLE
+       def_bool y
+
+config ARCH_SPARSEMEM_DEFAULT
+       def_bool y
+
 source "mm/Kconfig"
 
 config GENERIC_ISA_DMA
index e87fe7d..9914a17 100644 (file)
@@ -95,9 +95,6 @@ extern int __ashrdi3(int, int);
 
 extern int dump_fpu (struct pt_regs * regs, elf_fpregset_t * fpregs);
 
-extern unsigned long phys_base;
-extern unsigned long pfn_base;
-
 extern unsigned int sys_call_table[];
 
 extern void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
@@ -346,11 +343,7 @@ EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__clear_user);
 
 /* Various address conversion macros use this. */
-EXPORT_SYMBOL(phys_base);
-EXPORT_SYMBOL(pfn_base);
 EXPORT_SYMBOL(sparc64_valid_addr_bitmap);
-EXPORT_SYMBOL(page_to_pfn);
-EXPORT_SYMBOL(pfn_to_page);
 
 /* No version information on this, heavily used in inline asm,
  * and will always be 'void __ret_efault(void)'.
index a639393..5f67b53 100644 (file)
@@ -130,11 +130,9 @@ static void __init read_obp_memory(const char *property,
 
 unsigned long *sparc64_valid_addr_bitmap __read_mostly;
 
-/* Ugly, but necessary... -DaveM */
-unsigned long phys_base __read_mostly;
+/* Kernel physical address base and size in bytes.  */
 unsigned long kern_base __read_mostly;
 unsigned long kern_size __read_mostly;
-unsigned long pfn_base __read_mostly;
 
 /* get_new_mmu_context() uses "cache + 1".  */
 DEFINE_SPINLOCK(ctx_alloc_lock);
@@ -368,16 +366,6 @@ void __kprobes flush_icache_range(unsigned long start, unsigned long end)
        }
 }
 
-unsigned long page_to_pfn(struct page *page)
-{
-       return (unsigned long) ((page - mem_map) + pfn_base);
-}
-
-struct page *pfn_to_page(unsigned long pfn)
-{
-       return (mem_map + (pfn - pfn_base));
-}
-
 void show_mem(void)
 {
        printk("Mem-info:\n");
@@ -773,9 +761,78 @@ void sparc_ultra_dump_dtlb(void)
 
 extern unsigned long cmdline_memory_size;
 
-unsigned long __init bootmem_init(unsigned long *pages_avail)
+/* Find a free area for the bootmem map, avoiding the kernel image
+ * and the initial ramdisk.
+ */
+static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn,
+                                              unsigned long end_pfn)
 {
-       unsigned long bootmap_size, start_pfn, end_pfn;
+       unsigned long avoid_start, avoid_end, bootmap_size;
+       int i;
+
+       bootmap_size = ((end_pfn - start_pfn) + 7) / 8;
+       bootmap_size = ALIGN(bootmap_size, sizeof(long));
+
+       avoid_start = avoid_end = 0;
+#ifdef CONFIG_BLK_DEV_INITRD
+       avoid_start = initrd_start;
+       avoid_end = PAGE_ALIGN(initrd_end);
+#endif
+
+#ifdef CONFIG_DEBUG_BOOTMEM
+       prom_printf("choose_bootmap_pfn: kern[%lx:%lx] avoid[%lx:%lx]\n",
+                   kern_base, PAGE_ALIGN(kern_base + kern_size),
+                   avoid_start, avoid_end);
+#endif
+       for (i = 0; i < pavail_ents; i++) {
+               unsigned long start, end;
+
+               start = pavail[i].phys_addr;
+               end = start + pavail[i].reg_size;
+
+               while (start < end) {
+                       if (start >= kern_base &&
+                           start < PAGE_ALIGN(kern_base + kern_size)) {
+                               start = PAGE_ALIGN(kern_base + kern_size);
+                               continue;
+                       }
+                       if (start >= avoid_start && start < avoid_end) {
+                               start = avoid_end;
+                               continue;
+                       }
+
+                       if ((end - start) < bootmap_size)
+                               break;
+
+                       if (start < kern_base &&
+                           (start + bootmap_size) > kern_base) {
+                               start = PAGE_ALIGN(kern_base + kern_size);
+                               continue;
+                       }
+
+                       if (start < avoid_start &&
+                           (start + bootmap_size) > avoid_start) {
+                               start = avoid_end;
+                               continue;
+                       }
+
+                       /* OK, it doesn't overlap anything, use it.  */
+#ifdef CONFIG_DEBUG_BOOTMEM
+                       prom_printf("choose_bootmap_pfn: Using %lx [%lx]\n",
+                                   start >> PAGE_SHIFT, start);
+#endif
+                       return start >> PAGE_SHIFT;
+               }
+       }
+
+       prom_printf("Cannot find free area for bootmap, aborting.\n");
+       prom_halt();
+}
+
+static unsigned long __init bootmem_init(unsigned long *pages_avail,
+                                        unsigned long phys_base)
+{
+       unsigned long bootmap_size, end_pfn;
        unsigned long end_of_phys_memory = 0UL;
        unsigned long bootmap_pfn, bytes_avail, size;
        int i;
@@ -813,14 +870,6 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
 
        *pages_avail = bytes_avail >> PAGE_SHIFT;
 
-       /* Start with page aligned address of last symbol in kernel
-        * image.  The kernel is hard mapped below PAGE_OFFSET in a
-        * 4MB locked TLB translation.
-        */
-       start_pfn = PAGE_ALIGN(kern_base + kern_size) >> PAGE_SHIFT;
-
-       bootmap_pfn = start_pfn;
-
        end_pfn = end_of_phys_memory >> PAGE_SHIFT;
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -837,23 +886,23 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
                                         "(0x%016lx > 0x%016lx)\ndisabling initrd\n",
                               initrd_end, end_of_phys_memory);
                        initrd_start = 0;
-               }
-               if (initrd_start) {
-                       if (initrd_start >= (start_pfn << PAGE_SHIFT) &&
-                           initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE)
-                               bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT;
+                       initrd_end = 0;
                }
        }
 #endif 
        /* Initialize the boot-time allocator. */
        max_pfn = max_low_pfn = end_pfn;
-       min_low_pfn = pfn_base;
+       min_low_pfn = (phys_base >> PAGE_SHIFT);
+
+       bootmap_pfn = choose_bootmap_pfn(min_low_pfn, end_pfn);
 
 #ifdef CONFIG_DEBUG_BOOTMEM
        prom_printf("init_bootmem(min[%lx], bootmap[%lx], max[%lx])\n",
                    min_low_pfn, bootmap_pfn, max_low_pfn);
 #endif
-       bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base, end_pfn);
+       bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn,
+                                        (phys_base >> PAGE_SHIFT),
+                                        end_pfn);
 
        /* Now register the available physical memory with the
         * allocator.
@@ -901,6 +950,20 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
        reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size);
        *pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
 
+       for (i = 0; i < pavail_ents; i++) {
+               unsigned long start_pfn, end_pfn;
+
+               start_pfn = pavail[i].phys_addr >> PAGE_SHIFT;
+               end_pfn = (start_pfn + (pavail[i].reg_size >> PAGE_SHIFT));
+#ifdef CONFIG_DEBUG_BOOTMEM
+               prom_printf("memory_present(0, %lx, %lx)\n",
+                           start_pfn, end_pfn);
+#endif
+               memory_present(0, start_pfn, end_pfn);
+       }
+
+       sparse_init();
+
        return end_pfn;
 }
 
@@ -1180,7 +1243,7 @@ static void sun4v_pgprot_init(void);
 
 void __init paging_init(void)
 {
-       unsigned long end_pfn, pages_avail, shift;
+       unsigned long end_pfn, pages_avail, shift, phys_base;
        unsigned long real_end, i;
 
        kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
@@ -1211,8 +1274,6 @@ void __init paging_init(void)
        for (i = 0; i < pavail_ents; i++)
                phys_base = min(phys_base, pavail[i].phys_addr);
 
-       pfn_base = phys_base >> PAGE_SHIFT;
-
        set_bit(0, mmu_context_bmap);
 
        shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE);
@@ -1248,7 +1309,9 @@ void __init paging_init(void)
 
        /* Setup bootmem... */
        pages_avail = 0;
-       last_valid_pfn = end_pfn = bootmem_init(&pages_avail);
+       last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base);
+
+       max_mapnr = last_valid_pfn - (phys_base >> PAGE_SHIFT);
 
        kernel_physical_mapping_init();
 
@@ -1261,7 +1324,7 @@ void __init paging_init(void)
                for (znum = 0; znum < MAX_NR_ZONES; znum++)
                        zones_size[znum] = zholes_size[znum] = 0;
 
-               npages = end_pfn - pfn_base;
+               npages = end_pfn - (phys_base >> PAGE_SHIFT);
                zones_size[ZONE_DMA] = npages;
                zholes_size[ZONE_DMA] = npages - pages_avail;
 
@@ -1336,7 +1399,6 @@ void __init mem_init(void)
 
        taint_real_pages();
 
-       max_mapnr = last_valid_pfn - pfn_base;
        high_memory = __va(last_valid_pfn << PAGE_SHIFT);
 
 #ifdef CONFIG_DEBUG_BOOTMEM
diff --git a/include/asm-sparc64/numnodes.h b/include/asm-sparc64/numnodes.h
new file mode 100644 (file)
index 0000000..017e7e7
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _SPARC64_NUMNODES_H
+#define _SPARC64_NUMNODES_H
+
+#define NODES_SHIFT    0
+
+#endif /* !(_SPARC64_NUMNODES_H) */
index c277ac5..f6b4925 100644 (file)
@@ -125,17 +125,10 @@ typedef unsigned long pgprot_t;
 #define __pa(x)                        ((unsigned long)(x) - PAGE_OFFSET)
 #define __va(x)                        ((void *)((unsigned long) (x) + PAGE_OFFSET))
 
-/* PFNs are real physical page numbers.  However, mem_map only begins to record
- * per-page information starting at pfn_base.  This is to handle systems where
- * the first physical page in the machine is at some huge physical address,
- * such as 4GB.   This is common on a partitioned E10000, for example.
- */
-extern struct page *pfn_to_page(unsigned long pfn);
-extern unsigned long page_to_pfn(struct page *);
+#define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
 
 #define virt_to_page(kaddr)    pfn_to_page(__pa(kaddr)>>PAGE_SHIFT)
 
-#define pfn_valid(pfn)         (((pfn)-(pfn_base)) < max_mapnr)
 #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 
 #define virt_to_phys __pa
index 75a2cd2..d427ce6 100644 (file)
@@ -217,9 +217,6 @@ extern unsigned long pg_iobits;
 extern unsigned long _PAGE_ALL_SZ_BITS;
 extern unsigned long _PAGE_SZBITS;
 
-extern unsigned long phys_base;
-extern unsigned long pfn_base;
-
 extern struct page *mem_map_zero;
 #define ZERO_PAGE(vaddr)       (mem_map_zero)
 
diff --git a/include/asm-sparc64/sparsemem.h b/include/asm-sparc64/sparsemem.h
new file mode 100644 (file)
index 0000000..ed5c9d8
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef _SPARC64_SPARSEMEM_H
+#define _SPARC64_SPARSEMEM_H
+
+#ifdef __KERNEL__
+
+#define SECTION_SIZE_BITS       26
+#define MAX_PHYSADDR_BITS       42
+#define MAX_PHYSMEM_BITS        42
+
+#endif /* !(__KERNEL__) */
+
+#endif /* !(_SPARC64_SPARSEMEM_H) */