[PATCH] memory hotplug locking: node_size_lock
Dave Hansen [Sun, 30 Oct 2005 01:16:52 +0000 (18:16 -0700)]
pgdat->node_size_lock is basically only neeeded in one place in the normal
code: show_mem(), which is the arch-specific sysrq-m printing function.

Strictly speaking, the architectures not doing memory hotplug do no need this
locking in show_mem().  However, they are all included for completeness.  This
should also make any future consolidation of all of the implementations a
little more straightforward.

This lock is also held in the sparsemem code during a memory removal, as
sections are invalidated.  This is the place there pfn_valid() is made false
for a memory area that's being removed.  The lock is only required when doing
pfn_valid() operations on memory which the user does not already have a
reference on the page, such as in show_mem().

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

arch/alpha/mm/numa.c
arch/i386/mm/pgtable.c
arch/ia64/mm/discontig.c
arch/m32r/mm/init.c
arch/parisc/mm/init.c
arch/ppc64/mm/init.c
include/linux/memory_hotplug.h [new file with mode: 0644]
include/linux/mmzone.h
mm/page_alloc.c

index c7481d5..6d52512 100644 (file)
@@ -371,6 +371,8 @@ show_mem(void)
        show_free_areas();
        printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
        for_each_online_node(nid) {
+               unsigned long flags;
+               pgdat_resize_lock(NODE_DATA(nid), &flags);
                i = node_spanned_pages(nid);
                while (i-- > 0) {
                        struct page *page = nid_page_nr(nid, i);
@@ -384,6 +386,7 @@ show_mem(void)
                        else
                                shared += page_count(page) - 1;
                }
+               pgdat_resize_unlock(NODE_DATA(nid), &flags);
        }
        printk("%ld pages of RAM\n",total);
        printk("%ld free pages\n",free);
index 39c099f..9db3242 100644 (file)
@@ -31,11 +31,13 @@ void show_mem(void)
        pg_data_t *pgdat;
        unsigned long i;
        struct page_state ps;
+       unsigned long flags;
 
        printk(KERN_INFO "Mem-info:\n");
        show_free_areas();
        printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
        for_each_pgdat(pgdat) {
+               pgdat_resize_lock(pgdat, &flags);
                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
                        page = pgdat_page_nr(pgdat, i);
                        total++;
@@ -48,6 +50,7 @@ void show_mem(void)
                        else if (page_count(page))
                                shared += page_count(page) - 1;
                }
+               pgdat_resize_unlock(pgdat, &flags);
        }
        printk(KERN_INFO "%d pages of RAM\n", total);
        printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
index a3788fb..a88cdb7 100644 (file)
@@ -555,9 +555,13 @@ void show_mem(void)
        show_free_areas();
        printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
        for_each_pgdat(pgdat) {
-               unsigned long present = pgdat->node_present_pages;
+               unsigned long present;
+               unsigned long flags;
                int shared = 0, cached = 0, reserved = 0;
+
                printk("Node ID: %d\n", pgdat->node_id);
+               pgdat_resize_lock(pgdat, &flags);
+               present = pgdat->node_present_pages;
                for(i = 0; i < pgdat->node_spanned_pages; i++) {
                        struct page *page;
                        if (pfn_valid(pgdat->node_start_pfn + i))
@@ -571,6 +575,7 @@ void show_mem(void)
                        else if (page_count(page))
                                shared += page_count(page)-1;
                }
+               pgdat_resize_unlock(pgdat, &flags);
                total_present += present;
                total_reserved += reserved;
                total_cached += cached;
index d9a40b1..6facf15 100644 (file)
@@ -48,6 +48,8 @@ void show_mem(void)
        show_free_areas();
        printk("Free swap:       %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
        for_each_pgdat(pgdat) {
+               unsigned long flags;
+               pgdat_resize_lock(pgdat, &flags);
                for (i = 0; i < pgdat->node_spanned_pages; ++i) {
                        page = pgdat_page_nr(pgdat, i);
                        total++;
@@ -60,6 +62,7 @@ void show_mem(void)
                        else if (page_count(page))
                                shared += page_count(page) - 1;
                }
+               pgdat_resize_unlock(pgdat, &flags);
        }
        printk("%d pages of RAM\n", total);
        printk("%d pages of HIGHMEM\n",highmem);
@@ -150,10 +153,14 @@ int __init reservedpages_count(void)
        int reservedpages, nid, i;
 
        reservedpages = 0;
-       for_each_online_node(nid)
+       for_each_online_node(nid) {
+               unsigned long flags;
+               pgdat_resize_lock(NODE_DATA(nid), &flags);
                for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++)
                        if (PageReserved(nid_page_nr(nid, i)))
                                reservedpages++;
+               pgdat_resize_unlock(NODE_DATA(nid), &flags);
+       }
 
        return reservedpages;
 }
index 2886ad7..29b998e 100644 (file)
@@ -505,7 +505,9 @@ void show_mem(void)
 
                for (j = node_start_pfn(i); j < node_end_pfn(i); j++) {
                        struct page *p;
+                       unsigned long flags;
 
+                       pgdat_resize_lock(NODE_DATA(i), &flags);
                        p = nid_page_nr(i, j) - node_start_pfn(i);
 
                        total++;
@@ -517,6 +519,7 @@ void show_mem(void)
                                free++;
                        else
                                shared += page_count(p) - 1;
+                       pgdat_resize_unlock(NODE_DATA(i), &flags);
                }
        }
 #endif
index a45584b..975b26d 100644 (file)
@@ -104,6 +104,8 @@ void show_mem(void)
        show_free_areas();
        printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
        for_each_pgdat(pgdat) {
+               unsigned long flags;
+               pgdat_resize_lock(pgdat, &flags);
                for (i = 0; i < pgdat->node_spanned_pages; i++) {
                        page = pgdat_page_nr(pgdat, i);
                        total++;
@@ -114,6 +116,7 @@ void show_mem(void)
                        else if (page_count(page))
                                shared += page_count(page) - 1;
                }
+               pgdat_resize_unlock(pgdat, &flags);
        }
        printk("%ld pages of RAM\n", total);
        printk("%ld reserved pages\n", reserved);
@@ -647,11 +650,14 @@ void __init mem_init(void)
 #endif
 
        for_each_pgdat(pgdat) {
+               unsigned long flags;
+               pgdat_resize_lock(pgdat, &flags);
                for (i = 0; i < pgdat->node_spanned_pages; i++) {
                        page = pgdat_page_nr(pgdat, i);
                        if (PageReserved(page))
                                reservedpages++;
                }
+               pgdat_resize_unlock(pgdat, &flags);
        }
 
        codesize = (unsigned long)&_etext - (unsigned long)&_stext;
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
new file mode 100644 (file)
index 0000000..e8103be
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef __LINUX_MEMORY_HOTPLUG_H
+#define __LINUX_MEMORY_HOTPLUG_H
+
+#include <linux/mmzone.h>
+#include <linux/spinlock.h>
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+/*
+ * pgdat resizing functions
+ */
+static inline
+void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
+{
+       spin_lock_irqsave(&pgdat->node_size_lock, *flags);
+}
+static inline
+void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
+{
+       spin_lock_irqrestore(&pgdat->node_size_lock, *flags);
+}
+static inline
+void pgdat_resize_init(struct pglist_data *pgdat)
+{
+       spin_lock_init(&pgdat->node_size_lock);
+}
+#else /* ! CONFIG_MEMORY_HOTPLUG */
+/*
+ * Stub functions for when hotplug is off
+ */
+static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
+static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
+#endif
+#endif /* __LINUX_MEMORY_HOTPLUG_H */
index 4674145..e050d68 100644 (file)
@@ -273,6 +273,16 @@ typedef struct pglist_data {
        struct page *node_mem_map;
 #endif
        struct bootmem_data *bdata;
+#ifdef CONFIG_MEMORY_HOTPLUG
+       /*
+        * Must be held any time you expect node_start_pfn, node_present_pages
+        * or node_spanned_pages stay constant.  Holding this will also
+        * guarantee that any pfn_valid() stays that way.
+        *
+        * Nests above zone->lock and zone->size_seqlock.
+        */
+       spinlock_t node_size_lock;
+#endif
        unsigned long node_start_pfn;
        unsigned long node_present_pages; /* total number of physical pages */
        unsigned long node_spanned_pages; /* total size of physical page
@@ -293,6 +303,8 @@ typedef struct pglist_data {
 #endif
 #define nid_page_nr(nid, pagenr)       pgdat_page_nr(NODE_DATA(nid),(pagenr))
 
+#include <linux/memory_hotplug.h>
+
 extern struct pglist_data *pgdat_list;
 
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
index a51ef94..32fad6d 100644 (file)
@@ -1958,6 +1958,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat,
        int nid = pgdat->node_id;
        unsigned long zone_start_pfn = pgdat->node_start_pfn;
 
+       pgdat_resize_init(pgdat);
        pgdat->nr_zones = 0;
        init_waitqueue_head(&pgdat->kswapd_wait);
        pgdat->kswapd_max_order = 0;