Merge branch 'rcu/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck...
Ingo Molnar [Tue, 17 Jan 2012 08:44:17 +0000 (09:44 +0100)]
336 files changed:
Documentation/DocBook/debugobjects.tmpl
Documentation/kernel-parameters.txt
Documentation/trace/events.txt
arch/Kconfig
arch/arm/kernel/setup.c
arch/arm/mm/init.c
arch/cris/arch-v32/kernel/time.c
arch/ia64/Kconfig
arch/ia64/include/asm/cputime.h
arch/ia64/mm/contig.c
arch/ia64/mm/init.c
arch/m68k/platform/68328/timers.c
arch/m68k/platform/coldfire/dma_timer.c
arch/m68k/platform/coldfire/pit.c
arch/m68k/platform/coldfire/sltimers.c
arch/m68k/platform/coldfire/timers.c
arch/microblaze/include/asm/memblock.h [deleted file]
arch/microblaze/kernel/prom.c
arch/mips/Kconfig
arch/mips/kernel/setup.c
arch/mips/sgi-ip27/ip27-memory.c
arch/openrisc/include/asm/memblock.h [deleted file]
arch/openrisc/kernel/prom.c
arch/parisc/kernel/time.c
arch/powerpc/Kconfig
arch/powerpc/include/asm/cputime.h
arch/powerpc/include/asm/memblock.h [deleted file]
arch/powerpc/kernel/machine_kexec.c
arch/powerpc/kernel/prom.c
arch/powerpc/mm/init_32.c
arch/powerpc/mm/mem.c
arch/powerpc/mm/numa.c
arch/powerpc/mm/tlb_nohash.c
arch/powerpc/platforms/embedded6xx/wii.c
arch/powerpc/platforms/ps3/mm.c
arch/s390/Kconfig
arch/s390/appldata/appldata_os.c
arch/s390/include/asm/cputime.h
arch/s390/kernel/setup.c
arch/s390/oprofile/hwsampler.c
arch/s390/oprofile/init.c
arch/s390/oprofile/op_counter.h [new file with mode: 0644]
arch/score/Kconfig
arch/score/kernel/setup.c
arch/sh/Kconfig
arch/sh/include/asm/memblock.h [deleted file]
arch/sh/kernel/machine_kexec.c
arch/sh/kernel/setup.c
arch/sh/mm/Kconfig
arch/sh/mm/init.c
arch/sparc/Kconfig
arch/sparc/include/asm/memblock.h [deleted file]
arch/sparc/mm/init_64.c
arch/um/kernel/time.c
arch/unicore32/kernel/setup.c
arch/unicore32/mm/init.c
arch/unicore32/mm/mmu.c
arch/x86/Kconfig
arch/x86/ia32/ia32entry.S
arch/x86/include/asm/alternative-asm.h
arch/x86/include/asm/apic.h
arch/x86/include/asm/apic_flat_64.h [new file with mode: 0644]
arch/x86/include/asm/apicdef.h
arch/x86/include/asm/bitops.h
arch/x86/include/asm/cmpxchg.h
arch/x86/include/asm/cmpxchg_32.h
arch/x86/include/asm/cmpxchg_64.h
arch/x86/include/asm/div64.h
arch/x86/include/asm/e820.h
arch/x86/include/asm/hardirq.h
arch/x86/include/asm/i387.h
arch/x86/include/asm/insn.h
arch/x86/include/asm/mach_timer.h
arch/x86/include/asm/mc146818rtc.h
arch/x86/include/asm/mce.h
arch/x86/include/asm/memblock.h [deleted file]
arch/x86/include/asm/microcode.h
arch/x86/include/asm/numachip/numachip_csr.h [new file with mode: 0644]
arch/x86/include/asm/percpu.h
arch/x86/include/asm/perf_event.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/processor-flags.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/spinlock.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/topology.h
arch/x86/include/asm/tsc.h
arch/x86/include/asm/uaccess.h
arch/x86/include/asm/x86_init.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/amd_nb.c
arch/x86/kernel/aperture_64.c
arch/x86/kernel/apic/Makefile
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/apic_flat_64.c
arch/x86/kernel/apic/apic_numachip.c [new file with mode: 0644]
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/check.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/centaur.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cpu.h
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/mcheck/mce-inject.c
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/mcheck/mce_amd.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event.h
arch/x86/kernel/cpu/perf_event_amd.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/powerflags.c
arch/x86/kernel/cpu/proc.c
arch/x86/kernel/e820.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/head.c
arch/x86/kernel/head32.c
arch/x86/kernel/head64.c
arch/x86/kernel/hpet.c
arch/x86/kernel/irq.c
arch/x86/kernel/jump_label.c
arch/x86/kernel/microcode_amd.c
arch/x86/kernel/microcode_core.c
arch/x86/kernel/mpparse.c
arch/x86/kernel/process.c
arch/x86/kernel/process_64.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/trampoline.c
arch/x86/kernel/traps.c
arch/x86/kernel/tsc.c
arch/x86/kernel/tsc_sync.c
arch/x86/kernel/vsyscall_64.c
arch/x86/kernel/x86_init.c
arch/x86/lib/inat.c
arch/x86/lib/insn.c
arch/x86/lib/string_32.c
arch/x86/lib/x86-opcode-map.txt
arch/x86/mm/Makefile
arch/x86/mm/extable.c
arch/x86/mm/fault.c
arch/x86/mm/init.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/memblock.c [deleted file]
arch/x86/mm/memtest.c
arch/x86/mm/numa.c
arch/x86/mm/numa_32.c
arch/x86/mm/numa_64.c
arch/x86/mm/numa_emulation.c
arch/x86/mm/pageattr.c
arch/x86/mm/srat.c
arch/x86/oprofile/Makefile
arch/x86/oprofile/init.c
arch/x86/oprofile/nmi_int.c
arch/x86/oprofile/nmi_timer_int.c [deleted file]
arch/x86/platform/efi/efi.c
arch/x86/tools/Makefile
arch/x86/tools/gen-insn-attr-x86.awk
arch/x86/tools/insn_sanity.c [new file with mode: 0644]
arch/x86/xen/enlighten.c
arch/x86/xen/mmu.c
arch/x86/xen/setup.c
arch/xtensa/kernel/time.c
block/ioctl.c
drivers/char/random.c
drivers/clocksource/acpi_pm.c
drivers/clocksource/i8253.c
drivers/clocksource/tcb_clksrc.c
drivers/cpufreq/cpufreq_conservative.c
drivers/cpufreq/cpufreq_ondemand.c
drivers/cpufreq/cpufreq_stats.c
drivers/edac/i7core_edac.c
drivers/edac/mce_amd.c
drivers/edac/sb_edac.c
drivers/hwmon/coretemp.c
drivers/iommu/intel-iommu.c
drivers/lguest/x86/core.c
drivers/macintosh/rack-meter.c
drivers/oprofile/nmi_timer_int.c [new file with mode: 0644]
drivers/oprofile/oprof.c
drivers/oprofile/oprof.h
drivers/oprofile/timer_int.c
drivers/pci/Kconfig
drivers/pci/ioapic.c
fs/compat_ioctl.c
fs/ioctl.c
fs/proc/array.c
fs/proc/stat.c
fs/proc/uptime.c
include/asm-generic/cputime.h
include/linux/bitops.h
include/linux/bootmem.h
include/linux/debugobjects.h
include/linux/jump_label.h
include/linux/kernel_stat.h
include/linux/latencytop.h
include/linux/lockdep.h
include/linux/memblock.h
include/linux/mm.h
include/linux/mmzone.h
include/linux/perf_event.h
include/linux/poison.h
include/linux/sched.h
include/linux/wait.h
include/trace/events/sched.h
init/main.c
kernel/Makefile
kernel/acct.c
kernel/cpu.c
kernel/events/Makefile
kernel/events/callchain.c [new file with mode: 0644]
kernel/events/core.c
kernel/events/internal.h
kernel/exit.c
kernel/fork.c
kernel/itimer.c
kernel/jump_label.c
kernel/lockdep.c
kernel/panic.c
kernel/posix-cpu-timers.c
kernel/printk.c
kernel/rtmutex-debug.c
kernel/sched/Makefile [new file with mode: 0644]
kernel/sched/auto_group.c [moved from kernel/sched_autogroup.c with 88% similarity]
kernel/sched/auto_group.h [moved from kernel/sched_autogroup.h with 66% similarity]
kernel/sched/clock.c [moved from kernel/sched_clock.c with 100% similarity]
kernel/sched/core.c [moved from kernel/sched.c with 79% similarity]
kernel/sched/cpupri.c [moved from kernel/sched_cpupri.c with 99% similarity]
kernel/sched/cpupri.h [moved from kernel/sched_cpupri.h with 100% similarity]
kernel/sched/debug.c [moved from kernel/sched_debug.c with 99% similarity]
kernel/sched/fair.c [moved from kernel/sched_fair.c with 87% similarity]
kernel/sched/features.h [moved from kernel/sched_features.h with 75% similarity]
kernel/sched/idle_task.c [moved from kernel/sched_idletask.c with 96% similarity]
kernel/sched/rt.c [moved from kernel/sched_rt.c with 90% similarity]
kernel/sched/sched.h [new file with mode: 0644]
kernel/sched/stats.c [new file with mode: 0644]
kernel/sched/stats.h [moved from kernel/sched_stats.h with 70% similarity]
kernel/sched/stop_task.c [moved from kernel/sched_stoptask.c with 97% similarity]
kernel/signal.c
kernel/sys.c
kernel/time/tick-sched.c
kernel/time/timekeeping.c
kernel/timer.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_events_filter.c
kernel/trace/trace_irqsoff.c
kernel/trace/trace_output.c
kernel/trace/trace_sched_wakeup.c
kernel/tsacct.c
kernel/wait.c
lib/debugobjects.c
mm/Kconfig
mm/memblock.c
mm/nobootmem.c
mm/page_alloc.c
mm/slub.c
net/socket.c
tools/perf/Documentation/perf-annotate.txt
tools/perf/Documentation/perf-buildid-list.txt
tools/perf/Documentation/perf-evlist.txt
tools/perf/Documentation/perf-kmem.txt
tools/perf/Documentation/perf-lock.txt
tools/perf/Documentation/perf-record.txt
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-sched.txt
tools/perf/Documentation/perf-script.txt
tools/perf/Documentation/perf-test.txt
tools/perf/Documentation/perf-timechart.txt
tools/perf/Makefile
tools/perf/builtin-annotate.c
tools/perf/builtin-buildid-list.c
tools/perf/builtin-diff.c
tools/perf/builtin-evlist.c
tools/perf/builtin-inject.c
tools/perf/builtin-kmem.c
tools/perf/builtin-kvm.c
tools/perf/builtin-lock.c
tools/perf/builtin-probe.c
tools/perf/builtin-record.c
tools/perf/builtin-report.c
tools/perf/builtin-sched.c
tools/perf/builtin-script.c
tools/perf/builtin-stat.c
tools/perf/builtin-test.c
tools/perf/builtin-timechart.c
tools/perf/builtin-top.c
tools/perf/perf.c
tools/perf/perf.h
tools/perf/util/annotate.c
tools/perf/util/annotate.h
tools/perf/util/build-id.c
tools/perf/util/build-id.h
tools/perf/util/callchain.h
tools/perf/util/cgroup.c
tools/perf/util/config.c
tools/perf/util/debugfs.c
tools/perf/util/debugfs.h
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/header.c
tools/perf/util/header.h
tools/perf/util/hist.h
tools/perf/util/include/linux/bitops.h
tools/perf/util/map.c
tools/perf/util/map.h
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/probe-finder.h
tools/perf/util/scripting-engines/trace-event-perl.c
tools/perf/util/scripting-engines/trace-event-python.c
tools/perf/util/session.c
tools/perf/util/session.h
tools/perf/util/setup.py
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/perf/util/tool.h [new file with mode: 0644]
tools/perf/util/top.h
tools/perf/util/trace-event-info.c
tools/perf/util/trace-event-scripting.c
tools/perf/util/trace-event.h
tools/perf/util/ui/browsers/annotate.c
tools/perf/util/ui/browsers/hists.c
tools/perf/util/ui/progress.c
tools/perf/util/usage.c
tools/perf/util/util.h
tools/perf/util/values.c

index 08ff908..24979f6 100644 (file)
@@ -96,6 +96,7 @@
        <listitem><para>debug_object_deactivate</para></listitem>
        <listitem><para>debug_object_destroy</para></listitem>
        <listitem><para>debug_object_free</para></listitem>
+       <listitem><para>debug_object_assert_init</para></listitem>
       </itemizedlist>
       Each of these functions takes the address of the real object and
       a pointer to the object type specific debug description
        debug checks.
       </para>
     </sect1>
+
+    <sect1 id="debug_object_assert_init">
+      <title>debug_object_assert_init</title>
+      <para>
+       This function is called to assert that an object has been
+       initialized.
+      </para>
+      <para>
+       When the real object is not tracked by debugobjects, it calls
+       fixup_assert_init of the object type description structure
+       provided by the caller, with the hardcoded object state
+       ODEBUG_NOT_AVAILABLE. The fixup function can correct the problem
+       by calling debug_object_init and other specific initializing
+       functions.
+      </para>
+      <para>
+       When the real object is already tracked by debugobjects it is
+       ignored.
+      </para>
+    </sect1>
   </chapter>
   <chapter id="fixupfunctions">
     <title>Fixup functions</title>
        statistics.
       </para>
     </sect1>
+    <sect1 id="fixup_assert_init">
+      <title>fixup_assert_init</title>
+      <para>
+       This function is called from the debug code whenever a problem
+       in debug_object_assert_init is detected.
+      </para>
+      <para>
+       Called from debug_object_assert_init() with a hardcoded state
+       ODEBUG_STATE_NOTAVAILABLE when the object is not found in the
+       debug bucket.
+      </para>
+      <para>
+       The function returns 1 when the fixup was successful,
+       otherwise 0. The return value is used to update the
+       statistics.
+      </para>
+      <para>
+       Note, this function should make sure debug_object_init() is
+       called before returning.
+      </para>
+      <para>
+       The handling of statically initialized objects is a special
+       case. The fixup function should check if this is a legitimate
+       case of a statically initialized object or not. In this case only
+       debug_object_init() should be called to make the object known to
+       the tracker. Then the function should return 0 because this is not
+       a real fixup.
+      </para>
+    </sect1>
   </chapter>
   <chapter id="bugs">
     <title>Known Bugs And Assumptions</title>
index 81c287f..e229769 100644 (file)
@@ -1885,6 +1885,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        arch_perfmon: [X86] Force use of architectural
                                perfmon on Intel CPUs instead of the
                                CPU specific event set.
+                       timer: [X86] Force use of architectural NMI
+                               timer mode (see also oprofile.timer
+                               for generic hr timer mode)
+                               [s390] Force legacy basic mode sampling
+                                (report cpu_type "timer")
 
        oops=panic      Always panic on oopses. Default is to just kill the
                        process, but there is a small probability of
@@ -2750,11 +2755,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        functions are at fixed addresses, they make nice
                        targets for exploits that can control RIP.
 
-                       emulate     Vsyscalls turn into traps and are emulated
-                                   reasonably safely.
+                       emulate     [default] Vsyscalls turn into traps and are
+                                   emulated reasonably safely.
 
-                       native      [default] Vsyscalls are native syscall
-                                   instructions.
+                       native      Vsyscalls are native syscall instructions.
                                    This is a little bit faster than trapping
                                    and makes a few dynamic recompilers work
                                    better than they would in emulation mode.
index b510564..bb24c2a 100644 (file)
@@ -191,8 +191,6 @@ And for string fields they are:
 
 Currently, only exact string matches are supported.
 
-Currently, the maximum number of predicates in a filter is 16.
-
 5.2 Setting filters
 -------------------
 
index 4b0669c..2505740 100644 (file)
@@ -30,6 +30,10 @@ config OPROFILE_EVENT_MULTIPLEX
 config HAVE_OPROFILE
        bool
 
+config OPROFILE_NMI_TIMER
+       def_bool y
+       depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+
 config KPROBES
        bool "Kprobes"
        depends on MODULES
index 8fc2c8f..c0b59bf 100644 (file)
@@ -52,6 +52,7 @@
 #include <asm/mach/time.h>
 #include <asm/traps.h>
 #include <asm/unwind.h>
+#include <asm/memblock.h>
 
 #if defined(CONFIG_DEPRECATED_PARAM_STRUCT)
 #include "compat.h"
index fbdd12e..7c38474 100644 (file)
@@ -32,6 +32,7 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
+#include <asm/memblock.h>
 
 #include "mm.h"
 
@@ -332,7 +333,6 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
 
        sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
 
-       memblock_init();
        for (i = 0; i < mi->nr_banks; i++)
                memblock_add(mi->bank[i].start, mi->bank[i].size);
 
@@ -371,7 +371,7 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc)
        if (mdesc->reserve)
                mdesc->reserve();
 
-       memblock_analyze();
+       memblock_allow_resize();
        memblock_dump_all();
 }
 
index bb978ed..6773fc8 100644 (file)
@@ -47,14 +47,12 @@ static struct clocksource cont_rotime = {
        .rating = 300,
        .read   = read_cont_rotime,
        .mask   = CLOCKSOURCE_MASK(32),
-       .shift  = 10,
        .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static int __init etrax_init_cont_rotime(void)
 {
-       cont_rotime.mult = clocksource_khz2mult(100000, cont_rotime.shift);
-       clocksource_register(&cont_rotime);
+       clocksource_register_khz(&cont_rotime, 100000);
        return 0;
 }
 arch_initcall(etrax_init_cont_rotime);
index 27489b6..3b7a7c4 100644 (file)
@@ -23,6 +23,9 @@ config IA64
        select HAVE_ARCH_TRACEHOOK
        select HAVE_DMA_API_DEBUG
        select HAVE_GENERIC_HARDIRQS
+       select HAVE_MEMBLOCK
+       select HAVE_MEMBLOCK_NODE_MAP
+       select ARCH_DISCARD_MEMBLOCK
        select GENERIC_IRQ_PROBE
        select GENERIC_PENDING_IRQ if SMP
        select IRQ_PER_CPU
@@ -474,9 +477,6 @@ config NODES_SHIFT
          MAX_NUMNODES will be 2^(This value).
          If in doubt, use the default.
 
-config ARCH_POPULATES_NODE_MAP
-       def_bool y
-
 # VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
 # VIRTUAL_MEM_MAP has been retained for historical reasons.
 config VIRTUAL_MEM_MAP
index 5a274af..3deac95 100644 (file)
 #include <linux/jiffies.h>
 #include <asm/processor.h>
 
-typedef u64 cputime_t;
-typedef u64 cputime64_t;
+typedef u64 __nocast cputime_t;
+typedef u64 __nocast cputime64_t;
 
-#define cputime_zero                   ((cputime_t)0)
 #define cputime_one_jiffy              jiffies_to_cputime(1)
-#define cputime_max                    ((~((cputime_t)0) >> 1) - 1)
-#define cputime_add(__a, __b)          ((__a) +  (__b))
-#define cputime_sub(__a, __b)          ((__a) -  (__b))
-#define cputime_div(__a, __n)          ((__a) /  (__n))
-#define cputime_halve(__a)             ((__a) >> 1)
-#define cputime_eq(__a, __b)           ((__a) == (__b))
-#define cputime_gt(__a, __b)           ((__a) >  (__b))
-#define cputime_ge(__a, __b)           ((__a) >= (__b))
-#define cputime_lt(__a, __b)           ((__a) <  (__b))
-#define cputime_le(__a, __b)           ((__a) <= (__b))
-
-#define cputime64_zero                 ((cputime64_t)0)
-#define cputime64_add(__a, __b)                ((__a) + (__b))
-#define cputime64_sub(__a, __b)                ((__a) - (__b))
-#define cputime_to_cputime64(__ct)     (__ct)
 
 /*
  * Convert cputime <-> jiffies (HZ)
  */
-#define cputime_to_jiffies(__ct)       ((__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies_to_cputime(__jif)      ((__jif) * (NSEC_PER_SEC / HZ))
-#define cputime64_to_jiffies64(__ct)   ((__ct) / (NSEC_PER_SEC / HZ))
-#define jiffies64_to_cputime64(__jif)  ((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime_to_jiffies(__ct)       \
+       ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies_to_cputime(__jif)      \
+       (__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
+#define cputime64_to_jiffies64(__ct)   \
+       ((__force u64)(__ct) / (NSEC_PER_SEC / HZ))
+#define jiffies64_to_cputime64(__jif)  \
+       (__force cputime64_t)((__jif) * (NSEC_PER_SEC / HZ))
 
 /*
  * Convert cputime <-> microseconds
  */
-#define cputime_to_usecs(__ct)         ((__ct) / NSEC_PER_USEC)
-#define usecs_to_cputime(__usecs)      ((__usecs) * NSEC_PER_USEC)
-#define usecs_to_cputime64(__usecs)    usecs_to_cputime(__usecs)
+#define cputime_to_usecs(__ct)         \
+       ((__force u64)(__ct) / NSEC_PER_USEC)
+#define usecs_to_cputime(__usecs)      \
+       (__force cputime_t)((__usecs) * NSEC_PER_USEC)
+#define usecs_to_cputime64(__usecs)    \
+       (__force cputime64_t)((__usecs) * NSEC_PER_USEC)
 
 /*
  * Convert cputime <-> seconds
  */
-#define cputime_to_secs(__ct)          ((__ct) / NSEC_PER_SEC)
-#define secs_to_cputime(__secs)                ((__secs) * NSEC_PER_SEC)
+#define cputime_to_secs(__ct)          \
+       ((__force u64)(__ct) / NSEC_PER_SEC)
+#define secs_to_cputime(__secs)                \
+       (__force cputime_t)((__secs) * NSEC_PER_SEC)
 
 /*
  * Convert cputime <-> timespec (nsec)
  */
 static inline cputime_t timespec_to_cputime(const struct timespec *val)
 {
-       cputime_t ret = val->tv_sec * NSEC_PER_SEC;
-       return (ret + val->tv_nsec);
+       u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_nsec;
+       return (__force cputime_t) ret;
 }
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
 {
-       val->tv_sec  = ct / NSEC_PER_SEC;
-       val->tv_nsec = ct % NSEC_PER_SEC;
+       val->tv_sec  = (__force u64) ct / NSEC_PER_SEC;
+       val->tv_nsec = (__force u64) ct % NSEC_PER_SEC;
 }
 
 /*
@@ -87,25 +80,28 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *val)
  */
 static inline cputime_t timeval_to_cputime(struct timeval *val)
 {
-       cputime_t ret = val->tv_sec * NSEC_PER_SEC;
-       return (ret + val->tv_usec * NSEC_PER_USEC);
+       u64 ret = val->tv_sec * NSEC_PER_SEC + val->tv_usec * NSEC_PER_USEC;
+       return (__force cputime_t) ret;
 }
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *val)
 {
-       val->tv_sec = ct / NSEC_PER_SEC;
-       val->tv_usec = (ct % NSEC_PER_SEC) / NSEC_PER_USEC;
+       val->tv_sec = (__force u64) ct / NSEC_PER_SEC;
+       val->tv_usec = ((__force u64) ct % NSEC_PER_SEC) / NSEC_PER_USEC;
 }
 
 /*
  * Convert cputime <-> clock (USER_HZ)
  */
-#define cputime_to_clock_t(__ct)       ((__ct) / (NSEC_PER_SEC / USER_HZ))
-#define clock_t_to_cputime(__x)                ((__x) * (NSEC_PER_SEC / USER_HZ))
+#define cputime_to_clock_t(__ct)       \
+       ((__force u64)(__ct) / (NSEC_PER_SEC / USER_HZ))
+#define clock_t_to_cputime(__x)                \
+       (__force cputime_t)((__x) * (NSEC_PER_SEC / USER_HZ))
 
 /*
  * Convert cputime64 to clock.
  */
-#define cputime64_to_clock_t(__ct)      cputime_to_clock_t((cputime_t)__ct)
+#define cputime64_to_clock_t(__ct)     \
+       cputime_to_clock_t((__force cputime_t)__ct)
 
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 #endif /* __IA64_CPUTIME_H */
index f114a3b..1516d1d 100644 (file)
@@ -16,6 +16,7 @@
  */
 #include <linux/bootmem.h>
 #include <linux/efi.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/nmi.h>
 #include <linux/swap.h>
@@ -348,7 +349,7 @@ paging_init (void)
                printk("Virtual mem_map starts at 0x%p\n", mem_map);
        }
 #else /* !CONFIG_VIRTUAL_MEM_MAP */
-       add_active_range(0, 0, max_low_pfn);
+       memblock_add_node(0, PFN_PHYS(max_low_pfn), 0);
        free_area_init_nodes(max_zone_pfns);
 #endif /* !CONFIG_VIRTUAL_MEM_MAP */
        zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
index 00cb0e2..13df239 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/bootmem.h>
 #include <linux/efi.h>
 #include <linux/elf.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/module.h>
@@ -557,8 +558,7 @@ int __init register_active_ranges(u64 start, u64 len, int nid)
 #endif
 
        if (start < end)
-               add_active_range(nid, __pa(start) >> PAGE_SHIFT,
-                       __pa(end) >> PAGE_SHIFT);
+               memblock_add_node(__pa(start), end - start, nid);
        return 0;
 }
 
index 309f725..f267886 100644 (file)
@@ -93,7 +93,6 @@ static struct clocksource m68328_clk = {
        .name   = "timer",
        .rating = 250,
        .read   = m68328_read_clk,
-       .shift  = 20,
        .mask   = CLOCKSOURCE_MASK(32),
        .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
@@ -115,8 +114,7 @@ void hw_timer_init(void)
 
        /* Enable timer 1 */
        TCTL |= TCTL_TEN;
-       m68328_clk.mult = clocksource_hz2mult(TICKS_PER_JIFFY*HZ, m68328_clk.shift);
-       clocksource_register(&m68328_clk);
+       clocksource_register_hz(&m68328_clk, TICKS_PER_JIFFY*HZ);
 }
 
 /***************************************************************************/
index a5f5628..235ad57 100644 (file)
@@ -44,7 +44,6 @@ static struct clocksource clocksource_cf_dt = {
        .rating         = 200,
        .read           = cf_dt_get_cycles,
        .mask           = CLOCKSOURCE_MASK(32),
-       .shift          = 20,
        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -60,9 +59,7 @@ static int __init  init_cf_dt_clocksource(void)
        __raw_writeb(0x00, DTER0);
        __raw_writel(0x00000000, DTRR0);
        __raw_writew(DMA_DTMR_CLK_DIV_16 | DMA_DTMR_ENABLE, DTMR0);
-       clocksource_cf_dt.mult = clocksource_hz2mult(DMA_FREQ,
-                                                    clocksource_cf_dt.shift);
-       return clocksource_register(&clocksource_cf_dt);
+       return clocksource_register_hz(&clocksource_cf_dt, DMA_FREQ);
 }
 
 arch_initcall(init_cf_dt_clocksource);
index c2b9809..02663d2 100644 (file)
@@ -144,7 +144,6 @@ static struct clocksource pit_clk = {
        .name   = "pit",
        .rating = 100,
        .read   = pit_read_clk,
-       .shift  = 20,
        .mask   = CLOCKSOURCE_MASK(32),
 };
 
@@ -162,8 +161,7 @@ void hw_timer_init(void)
 
        setup_irq(MCFINT_VECBASE + MCFINT_PIT1, &pit_irq);
 
-       pit_clk.mult = clocksource_hz2mult(FREQ, pit_clk.shift);
-       clocksource_register(&pit_clk);
+       clocksource_register_hz(&pit_clk, FREQ);
 }
 
 /***************************************************************************/
index 6a85daf..b7f822b 100644 (file)
@@ -114,7 +114,6 @@ static struct clocksource mcfslt_clk = {
        .name   = "slt",
        .rating = 250,
        .read   = mcfslt_read_clk,
-       .shift  = 20,
        .mask   = CLOCKSOURCE_MASK(32),
        .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
@@ -136,8 +135,7 @@ void hw_timer_init(void)
 
        setup_irq(MCF_IRQ_TIMER, &mcfslt_timer_irq);
 
-       mcfslt_clk.mult = clocksource_hz2mult(MCF_BUSCLK, mcfslt_clk.shift);
-       clocksource_register(&mcfslt_clk);
+       clocksource_register_hz(&mcfslt_clk, MCF_BUSCLK);
 
 #ifdef CONFIG_HIGHPROFILE
        mcfslt_profile_init();
index 60242f6..0d90da3 100644 (file)
@@ -88,7 +88,6 @@ static struct clocksource mcftmr_clk = {
        .name   = "tmr",
        .rating = 250,
        .read   = mcftmr_read_clk,
-       .shift  = 20,
        .mask   = CLOCKSOURCE_MASK(32),
        .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
@@ -109,8 +108,7 @@ void hw_timer_init(void)
        __raw_writew(MCFTIMER_TMR_ENORI | MCFTIMER_TMR_CLK16 |
                MCFTIMER_TMR_RESTART | MCFTIMER_TMR_ENABLE, TA(MCFTIMER_TMR));
 
-       mcftmr_clk.mult = clocksource_hz2mult(FREQ, mcftmr_clk.shift);
-       clocksource_register(&mcftmr_clk);
+       clocksource_register_hz(&mcftmr_clk, FREQ);
 
        setup_irq(MCF_IRQ_TIMER, &mcftmr_timer_irq);
 
diff --git a/arch/microblaze/include/asm/memblock.h b/arch/microblaze/include/asm/memblock.h
deleted file mode 100644 (file)
index 20a8e25..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Copyright (C) 2008 Michal Simek <monstr@monstr.eu>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_MICROBLAZE_MEMBLOCK_H
-#define _ASM_MICROBLAZE_MEMBLOCK_H
-
-#endif /* _ASM_MICROBLAZE_MEMBLOCK_H */
-
-
index 977484a..80d314e 100644 (file)
@@ -122,7 +122,6 @@ void __init early_init_devtree(void *params)
        of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line);
 
        /* Scan memory nodes and rebuild MEMBLOCKs */
-       memblock_init();
        of_scan_flat_dt(early_init_dt_scan_root, NULL);
        of_scan_flat_dt(early_init_dt_scan_memory, NULL);
 
@@ -130,7 +129,7 @@ void __init early_init_devtree(void *params)
        strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
        parse_early_param();
 
-       memblock_analyze();
+       memblock_allow_resize();
 
        pr_debug("Phys. mem: %lx\n", (unsigned long) memblock_phys_mem_size());
 
index d46f1da..9c652eb 100644 (file)
@@ -25,6 +25,9 @@ config MIPS
        select GENERIC_IRQ_SHOW
        select HAVE_ARCH_JUMP_LABEL
        select IRQ_FORCED_THREADING
+       select HAVE_MEMBLOCK
+       select HAVE_MEMBLOCK_NODE_MAP
+       select ARCH_DISCARD_MEMBLOCK
 
 menu "Machine selection"
 
@@ -2064,9 +2067,6 @@ config ARCH_DISCONTIGMEM_ENABLE
          or have huge holes in the physical address space for other reasons.
          See <file:Documentation/vm/numa> for more.
 
-config ARCH_POPULATES_NODE_MAP
-       def_bool y
-
 config ARCH_SPARSEMEM_ENABLE
        bool
        select SPARSEMEM_STATIC
index 84af26a..b1cb8f8 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/ioport.h>
 #include <linux/export.h>
 #include <linux/screen_info.h>
+#include <linux/memblock.h>
 #include <linux/bootmem.h>
 #include <linux/initrd.h>
 #include <linux/root_dev.h>
@@ -352,7 +353,7 @@ static void __init bootmem_init(void)
                        continue;
 #endif
 
-               add_active_range(0, start, end);
+               memblock_add_node(PFN_PHYS(start), PFN_PHYS(end - start), 0);
        }
 
        /*
index bc12971..b105eca 100644 (file)
@@ -12,6 +12,7 @@
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/module.h>
@@ -381,8 +382,8 @@ static void __init szmem(void)
                                continue;
                        }
                        num_physpages += slot_psize;
-                       add_active_range(node, slot_getbasepfn(node, slot),
-                                        slot_getbasepfn(node, slot) + slot_psize);
+                       memblock_add_node(PFN_PHYS(slot_getbasepfn(node, slot)),
+                                         PFN_PHYS(slot_psize), node);
                }
        }
 }
diff --git a/arch/openrisc/include/asm/memblock.h b/arch/openrisc/include/asm/memblock.h
deleted file mode 100644 (file)
index bbe5a1c..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * OpenRISC Linux
- *
- * Linux architectural port borrowing liberally from similar works of
- * others.  All original copyrights apply as per the original source
- * declaration.
- *
- * OpenRISC implementation:
- * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
- * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
- * et al.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __ASM_OPENRISC_MEMBLOCK_H
-#define __ASM_OPENRISC_MEMBLOCK_H
-
-/* empty */
-
-#endif /* __ASM_OPENRISC_MEMBLOCK_H */
index 1bb58ba..3d4478f 100644 (file)
@@ -76,14 +76,13 @@ void __init early_init_devtree(void *params)
        of_scan_flat_dt(early_init_dt_scan_chosen, cmd_line);
 
        /* Scan memory nodes and rebuild MEMBLOCKs */
-       memblock_init();
        of_scan_flat_dt(early_init_dt_scan_root, NULL);
        of_scan_flat_dt(early_init_dt_scan_memory, NULL);
 
        /* Save command line for /proc/cmdline and then parse parameters */
        strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
 
-       memblock_analyze();
+       memblock_allow_resize();
 
        /* We must copy the flattend device tree from init memory to regular
         * memory because the device tree references the strings in it
index 45b7389..7c07743 100644 (file)
@@ -198,8 +198,6 @@ static struct clocksource clocksource_cr16 = {
        .rating                 = 300,
        .read                   = read_cr16,
        .mask                   = CLOCKSOURCE_MASK(BITS_PER_LONG),
-       .mult                   = 0, /* to be set */
-       .shift                  = 22,
        .flags                  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -270,7 +268,5 @@ void __init time_init(void)
 
        /* register at clocksource framework */
        current_cr16_khz = PAGE0->mem_10msec/10;  /* kHz */
-       clocksource_cr16.mult = clocksource_khz2mult(current_cr16_khz,
-                                               clocksource_cr16.shift);
-       clocksource_register(&clocksource_cr16);
+       clocksource_register_khz(&clocksource_cr16, current_cr16_khz);
 }
index 951e18f..ead0bc6 100644 (file)
@@ -117,6 +117,7 @@ config PPC
        select HAVE_KRETPROBES
        select HAVE_ARCH_TRACEHOOK
        select HAVE_MEMBLOCK
+       select HAVE_MEMBLOCK_NODE_MAP
        select HAVE_DMA_ATTRS
        select HAVE_DMA_API_DEBUG
        select USE_GENERIC_SMP_HELPERS if SMP
@@ -421,9 +422,6 @@ config ARCH_SPARSEMEM_DEFAULT
        def_bool y
        depends on (SMP && PPC_PSERIES) || PPC_PS3
 
-config ARCH_POPULATES_NODE_MAP
-       def_bool y
-
 config SYS_SUPPORTS_HUGETLBFS
        bool
 
index 98b7c4b..6ec1c38 100644 (file)
@@ -29,25 +29,8 @@ static inline void setup_cputime_one_jiffy(void) { }
 #include <asm/time.h>
 #include <asm/param.h>
 
-typedef u64 cputime_t;
-typedef u64 cputime64_t;
-
-#define cputime_zero                   ((cputime_t)0)
-#define cputime_max                    ((~((cputime_t)0) >> 1) - 1)
-#define cputime_add(__a, __b)          ((__a) +  (__b))
-#define cputime_sub(__a, __b)          ((__a) -  (__b))
-#define cputime_div(__a, __n)          ((__a) /  (__n))
-#define cputime_halve(__a)             ((__a) >> 1)
-#define cputime_eq(__a, __b)           ((__a) == (__b))
-#define cputime_gt(__a, __b)           ((__a) >  (__b))
-#define cputime_ge(__a, __b)           ((__a) >= (__b))
-#define cputime_lt(__a, __b)           ((__a) <  (__b))
-#define cputime_le(__a, __b)           ((__a) <= (__b))
-
-#define cputime64_zero                 ((cputime64_t)0)
-#define cputime64_add(__a, __b)                ((__a) + (__b))
-#define cputime64_sub(__a, __b)                ((__a) - (__b))
-#define cputime_to_cputime64(__ct)     (__ct)
+typedef u64 __nocast cputime_t;
+typedef u64 __nocast cputime64_t;
 
 #ifdef __KERNEL__
 
@@ -65,7 +48,7 @@ DECLARE_PER_CPU(unsigned long, cputime_scaled_last_delta);
 
 static inline unsigned long cputime_to_jiffies(const cputime_t ct)
 {
-       return mulhdu(ct, __cputime_jiffies_factor);
+       return mulhdu((__force u64) ct, __cputime_jiffies_factor);
 }
 
 /* Estimate the scaled cputime by scaling the real cputime based on
@@ -74,14 +57,15 @@ static inline cputime_t cputime_to_scaled(const cputime_t ct)
 {
        if (cpu_has_feature(CPU_FTR_SPURR) &&
            __get_cpu_var(cputime_last_delta))
-               return ct * __get_cpu_var(cputime_scaled_last_delta) /
-                           __get_cpu_var(cputime_last_delta);
+               return (__force u64) ct *
+                       __get_cpu_var(cputime_scaled_last_delta) /
+                       __get_cpu_var(cputime_last_delta);
        return ct;
 }
 
 static inline cputime_t jiffies_to_cputime(const unsigned long jif)
 {
-       cputime_t ct;
+       u64 ct;
        unsigned long sec;
 
        /* have to be a little careful about overflow */
@@ -93,7 +77,7 @@ static inline cputime_t jiffies_to_cputime(const unsigned long jif)
        }
        if (sec)
                ct += (cputime_t) sec * tb_ticks_per_sec;
-       return ct;
+       return (__force cputime_t) ct;
 }
 
 static inline void setup_cputime_one_jiffy(void)
@@ -103,7 +87,7 @@ static inline void setup_cputime_one_jiffy(void)
 
 static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
 {
-       cputime_t ct;
+       u64 ct;
        u64 sec;
 
        /* have to be a little careful about overflow */
@@ -114,13 +98,13 @@ static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
                do_div(ct, HZ);
        }
        if (sec)
-               ct += (cputime_t) sec * tb_ticks_per_sec;
-       return ct;
+               ct += (u64) sec * tb_ticks_per_sec;
+       return (__force cputime64_t) ct;
 }
 
 static inline u64 cputime64_to_jiffies64(const cputime_t ct)
 {
-       return mulhdu(ct, __cputime_jiffies_factor);
+       return mulhdu((__force u64) ct, __cputime_jiffies_factor);
 }
 
 /*
@@ -130,12 +114,12 @@ extern u64 __cputime_msec_factor;
 
 static inline unsigned long cputime_to_usecs(const cputime_t ct)
 {
-       return mulhdu(ct, __cputime_msec_factor) * USEC_PER_MSEC;
+       return mulhdu((__force u64) ct, __cputime_msec_factor) * USEC_PER_MSEC;
 }
 
 static inline cputime_t usecs_to_cputime(const unsigned long us)
 {
-       cputime_t ct;
+       u64 ct;
        unsigned long sec;
 
        /* have to be a little careful about overflow */
@@ -147,7 +131,7 @@ static inline cputime_t usecs_to_cputime(const unsigned long us)
        }
        if (sec)
                ct += (cputime_t) sec * tb_ticks_per_sec;
-       return ct;
+       return (__force cputime_t) ct;
 }
 
 #define usecs_to_cputime64(us)         usecs_to_cputime(us)
@@ -159,12 +143,12 @@ extern u64 __cputime_sec_factor;
 
 static inline unsigned long cputime_to_secs(const cputime_t ct)
 {
-       return mulhdu(ct, __cputime_sec_factor);
+       return mulhdu((__force u64) ct, __cputime_sec_factor);
 }
 
 static inline cputime_t secs_to_cputime(const unsigned long sec)
 {
-       return (cputime_t) sec * tb_ticks_per_sec;
+       return (__force cputime_t)((u64) sec * tb_ticks_per_sec);
 }
 
 /*
@@ -172,7 +156,7 @@ static inline cputime_t secs_to_cputime(const unsigned long sec)
  */
 static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
 {
-       u64 x = ct;
+       u64 x = (__force u64) ct;
        unsigned int frac;
 
        frac = do_div(x, tb_ticks_per_sec);
@@ -184,11 +168,11 @@ static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
 
 static inline cputime_t timespec_to_cputime(const struct timespec *p)
 {
-       cputime_t ct;
+       u64 ct;
 
        ct = (u64) p->tv_nsec * tb_ticks_per_sec;
        do_div(ct, 1000000000);
-       return ct + (u64) p->tv_sec * tb_ticks_per_sec;
+       return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
 }
 
 /*
@@ -196,7 +180,7 @@ static inline cputime_t timespec_to_cputime(const struct timespec *p)
  */
 static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
 {
-       u64 x = ct;
+       u64 x = (__force u64) ct;
        unsigned int frac;
 
        frac = do_div(x, tb_ticks_per_sec);
@@ -208,11 +192,11 @@ static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
 
 static inline cputime_t timeval_to_cputime(const struct timeval *p)
 {
-       cputime_t ct;
+       u64 ct;
 
        ct = (u64) p->tv_usec * tb_ticks_per_sec;
        do_div(ct, 1000000);
-       return ct + (u64) p->tv_sec * tb_ticks_per_sec;
+       return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
 }
 
 /*
@@ -222,12 +206,12 @@ extern u64 __cputime_clockt_factor;
 
 static inline unsigned long cputime_to_clock_t(const cputime_t ct)
 {
-       return mulhdu(ct, __cputime_clockt_factor);
+       return mulhdu((__force u64) ct, __cputime_clockt_factor);
 }
 
 static inline cputime_t clock_t_to_cputime(const unsigned long clk)
 {
-       cputime_t ct;
+       u64 ct;
        unsigned long sec;
 
        /* have to be a little careful about overflow */
@@ -238,8 +222,8 @@ static inline cputime_t clock_t_to_cputime(const unsigned long clk)
                do_div(ct, USER_HZ);
        }
        if (sec)
-               ct += (cputime_t) sec * tb_ticks_per_sec;
-       return ct;
+               ct += (u64) sec * tb_ticks_per_sec;
+       return (__force cputime_t) ct;
 }
 
 #define cputime64_to_clock_t(ct)       cputime_to_clock_t((cputime_t)(ct))
diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h
deleted file mode 100644 (file)
index 43efc34..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _ASM_POWERPC_MEMBLOCK_H
-#define _ASM_POWERPC_MEMBLOCK_H
-
-#include <asm/udbg.h>
-
-#define MEMBLOCK_DBG(fmt...) udbg_printf(fmt)
-
-#endif /* _ASM_POWERPC_MEMBLOCK_H */
index 9ce1672..a2158a3 100644 (file)
@@ -107,9 +107,6 @@ void __init reserve_crashkernel(void)
        unsigned long long crash_size, crash_base;
        int ret;
 
-       /* this is necessary because of memblock_phys_mem_size() */
-       memblock_analyze();
-
        /* use common parsing */
        ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
                        &crash_size, &crash_base);
index fa1235b..abe405d 100644 (file)
@@ -733,8 +733,6 @@ void __init early_init_devtree(void *params)
        of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line);
 
        /* Scan memory nodes and rebuild MEMBLOCKs */
-       memblock_init();
-
        of_scan_flat_dt(early_init_dt_scan_root, NULL);
        of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
 
@@ -756,20 +754,14 @@ void __init early_init_devtree(void *params)
        early_reserve_mem();
        phyp_dump_reserve_mem();
 
-       limit = memory_limit;
-       if (! limit) {
-               phys_addr_t memsize;
-
-               /* Ensure that total memory size is page-aligned, because
-                * otherwise mark_bootmem() gets upset. */
-               memblock_analyze();
-               memsize = memblock_phys_mem_size();
-               if ((memsize & PAGE_MASK) != memsize)
-                       limit = memsize & PAGE_MASK;
-       }
+       /*
+        * Ensure that total memory size is page-aligned, because otherwise
+        * mark_bootmem() gets upset.
+        */
+       limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
        memblock_enforce_memory_limit(limit);
 
-       memblock_analyze();
+       memblock_allow_resize();
        memblock_dump_all();
 
        DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
index 161cefd..58861fa 100644 (file)
@@ -134,8 +134,7 @@ void __init MMU_init(void)
 
        if (memblock.memory.cnt > 1) {
 #ifndef CONFIG_WII
-               memblock.memory.cnt = 1;
-               memblock_analyze();
+               memblock_enforce_memory_limit(memblock.memory.regions[0].size);
                printk(KERN_WARNING "Only using first contiguous memory region");
 #else
                wii_memory_fixups();
@@ -158,7 +157,6 @@ void __init MMU_init(void)
 #ifndef CONFIG_HIGHMEM
                total_memory = total_lowmem;
                memblock_enforce_memory_limit(total_lowmem);
-               memblock_analyze();
 #endif /* CONFIG_HIGHMEM */
        }
 
index 2dd6bdd..8e2eb66 100644 (file)
@@ -199,7 +199,7 @@ void __init do_init_bootmem(void)
                unsigned long start_pfn, end_pfn;
                start_pfn = memblock_region_memory_base_pfn(reg);
                end_pfn = memblock_region_memory_end_pfn(reg);
-               add_active_range(0, start_pfn, end_pfn);
+               memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
        }
 
        /* Add all physical memory to the bootmem map, mark each area
index b22a83a..e6eea0a 100644 (file)
@@ -127,45 +127,25 @@ static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
 }
 
 /*
- * get_active_region_work_fn - A helper function for get_node_active_region
- *     Returns datax set to the start_pfn and end_pfn if they contain
- *     the initial value of datax->start_pfn between them
- * @start_pfn: start page(inclusive) of region to check
- * @end_pfn: end page(exclusive) of region to check
- * @datax: comes in with ->start_pfn set to value to search for and
- *     goes out with active range if it contains it
- * Returns 1 if search value is in range else 0
- */
-static int __init get_active_region_work_fn(unsigned long start_pfn,
-                                       unsigned long end_pfn, void *datax)
-{
-       struct node_active_region *data;
-       data = (struct node_active_region *)datax;
-
-       if (start_pfn <= data->start_pfn && end_pfn > data->start_pfn) {
-               data->start_pfn = start_pfn;
-               data->end_pfn = end_pfn;
-               return 1;
-       }
-       return 0;
-
-}
-
-/*
- * get_node_active_region - Return active region containing start_pfn
+ * get_node_active_region - Return active region containing pfn
  * Active range returned is empty if none found.
- * @start_pfn: The page to return the region for.
- * @node_ar: Returned set to the active region containing start_pfn
+ * @pfn: The page to return the region for
+ * @node_ar: Returned set to the active region containing @pfn
  */
-static void __init get_node_active_region(unsigned long start_pfn,
-                      struct node_active_region *node_ar)
+static void __init get_node_active_region(unsigned long pfn,
+                                         struct node_active_region *node_ar)
 {
-       int nid = early_pfn_to_nid(start_pfn);
+       unsigned long start_pfn, end_pfn;
+       int i, nid;
 
-       node_ar->nid = nid;
-       node_ar->start_pfn = start_pfn;
-       node_ar->end_pfn = start_pfn;
-       work_with_active_regions(nid, get_active_region_work_fn, node_ar);
+       for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
+               if (pfn >= start_pfn && pfn < end_pfn) {
+                       node_ar->nid = nid;
+                       node_ar->start_pfn = start_pfn;
+                       node_ar->end_pfn = end_pfn;
+                       break;
+               }
+       }
 }
 
 static void map_cpu_to_node(int cpu, int node)
@@ -710,9 +690,7 @@ static void __init parse_drconf_memory(struct device_node *memory)
                        node_set_online(nid);
                        sz = numa_enforce_memory_limit(base, size);
                        if (sz)
-                               add_active_range(nid, base >> PAGE_SHIFT,
-                                                (base >> PAGE_SHIFT)
-                                                + (sz >> PAGE_SHIFT));
+                               memblock_set_node(base, sz, nid);
                } while (--ranges);
        }
 }
@@ -802,8 +780,7 @@ new_range:
                                continue;
                }
 
-               add_active_range(nid, start >> PAGE_SHIFT,
-                               (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
+               memblock_set_node(start, size, nid);
 
                if (--ranges)
                        goto new_range;
@@ -839,7 +816,8 @@ static void __init setup_nonnuma(void)
                end_pfn = memblock_region_memory_end_pfn(reg);
 
                fake_numa_create_new_node(end_pfn, &nid);
-               add_active_range(nid, start_pfn, end_pfn);
+               memblock_set_node(PFN_PHYS(start_pfn),
+                                 PFN_PHYS(end_pfn - start_pfn), nid);
                node_set_online(nid);
        }
 }
index 4e13d6f..573ba3b 100644 (file)
@@ -615,7 +615,6 @@ static void __early_init_mmu(int boot_cpu)
 
                /* limit memory so we dont have linear faults */
                memblock_enforce_memory_limit(linear_map_top);
-               memblock_analyze();
 
                patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
                patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e);
index 1b5dc1a..6d8dadf 100644 (file)
@@ -79,24 +79,19 @@ void __init wii_memory_fixups(void)
        BUG_ON(memblock.memory.cnt != 2);
        BUG_ON(!page_aligned(p[0].base) || !page_aligned(p[1].base));
 
-       p[0].size = _ALIGN_DOWN(p[0].size, PAGE_SIZE);
-       p[1].size = _ALIGN_DOWN(p[1].size, PAGE_SIZE);
+       /* trim unaligned tail */
+       memblock_remove(ALIGN(p[1].base + p[1].size, PAGE_SIZE),
+                       (phys_addr_t)ULLONG_MAX);
 
-       wii_hole_start = p[0].base + p[0].size;
+       /* determine hole, add & reserve them */
+       wii_hole_start = ALIGN(p[0].base + p[0].size, PAGE_SIZE);
        wii_hole_size = p[1].base - wii_hole_start;
-
-       pr_info("MEM1: <%08llx %08llx>\n", p[0].base, p[0].size);
-       pr_info("HOLE: <%08lx %08lx>\n", wii_hole_start, wii_hole_size);
-       pr_info("MEM2: <%08llx %08llx>\n", p[1].base, p[1].size);
-
-       p[0].size += wii_hole_size + p[1].size;
-
-       memblock.memory.cnt = 1;
-       memblock_analyze();
-
-       /* reserve the hole */
+       memblock_add(wii_hole_start, wii_hole_size);
        memblock_reserve(wii_hole_start, wii_hole_size);
 
+       BUG_ON(memblock.memory.cnt != 1);
+       __memblock_dump_all();
+
        /* allow ioremapping the address space in the hole */
        __allow_ioremap_reserved = 1;
 }
index 72714ad..8bd6ba5 100644 (file)
@@ -319,7 +319,6 @@ static int __init ps3_mm_add_memory(void)
        }
 
        memblock_add(start_addr, map.r1.size);
-       memblock_analyze();
 
        result = online_pages(start_pfn, nr_pages);
 
index 373679b..d48ede3 100644 (file)
@@ -92,6 +92,9 @@ config S390
        select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
        select HAVE_RCU_TABLE_FREE if SMP
        select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+       select HAVE_MEMBLOCK
+       select HAVE_MEMBLOCK_NODE_MAP
+       select ARCH_DISCARD_MEMBLOCK
        select ARCH_INLINE_SPIN_TRYLOCK
        select ARCH_INLINE_SPIN_TRYLOCK_BH
        select ARCH_INLINE_SPIN_LOCK
@@ -345,9 +348,6 @@ config WARN_DYNAMIC_STACK
 
          Say N if you are unsure.
 
-config ARCH_POPULATES_NODE_MAP
-       def_bool y
-
 comment "Kernel preemption"
 
 source "kernel/Kconfig.preempt"
index 92f1cb7..4de031d 100644 (file)
@@ -115,21 +115,21 @@ static void appldata_get_os_data(void *data)
        j = 0;
        for_each_online_cpu(i) {
                os_data->os_cpu[j].per_cpu_user =
-                       cputime_to_jiffies(kstat_cpu(i).cpustat.user);
+                       cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]);
                os_data->os_cpu[j].per_cpu_nice =
-                       cputime_to_jiffies(kstat_cpu(i).cpustat.nice);
+                       cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]);
                os_data->os_cpu[j].per_cpu_system =
-                       cputime_to_jiffies(kstat_cpu(i).cpustat.system);
+                       cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]);
                os_data->os_cpu[j].per_cpu_idle =
-                       cputime_to_jiffies(kstat_cpu(i).cpustat.idle);
+                       cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]);
                os_data->os_cpu[j].per_cpu_irq =
-                       cputime_to_jiffies(kstat_cpu(i).cpustat.irq);
+                       cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]);
                os_data->os_cpu[j].per_cpu_softirq =
-                       cputime_to_jiffies(kstat_cpu(i).cpustat.softirq);
+                       cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]);
                os_data->os_cpu[j].per_cpu_iowait =
-                       cputime_to_jiffies(kstat_cpu(i).cpustat.iowait);
+                       cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]);
                os_data->os_cpu[j].per_cpu_steal =
-                       cputime_to_jiffies(kstat_cpu(i).cpustat.steal);
+                       cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]);
                os_data->os_cpu[j].cpu_id = i;
                j++;
        }
index b9acaaa..c23c390 100644 (file)
 
 /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
 
-typedef unsigned long long cputime_t;
-typedef unsigned long long cputime64_t;
+typedef unsigned long long __nocast cputime_t;
+typedef unsigned long long __nocast cputime64_t;
 
-#ifndef __s390x__
-
-static inline unsigned int
-__div(unsigned long long n, unsigned int base)
+static inline unsigned long __div(unsigned long long n, unsigned long base)
 {
+#ifndef __s390x__
        register_pair rp;
 
        rp.pair = n >> 1;
        asm ("dr %0,%1" : "+d" (rp) : "d" (base >> 1));
        return rp.subreg.odd;
+#else /* __s390x__ */
+       return n / base;
+#endif /* __s390x__ */
 }
 
-#else /* __s390x__ */
+#define cputime_one_jiffy              jiffies_to_cputime(1)
 
-static inline unsigned int
-__div(unsigned long long n, unsigned int base)
+/*
+ * Convert cputime to jiffies and back.
+ */
+static inline unsigned long cputime_to_jiffies(const cputime_t cputime)
 {
-       return n / base;
+       return __div((__force unsigned long long) cputime, 4096000000ULL / HZ);
 }
 
-#endif /* __s390x__ */
+static inline cputime_t jiffies_to_cputime(const unsigned int jif)
+{
+       return (__force cputime_t)(jif * (4096000000ULL / HZ));
+}
 
-#define cputime_zero                   (0ULL)
-#define cputime_one_jiffy              jiffies_to_cputime(1)
-#define cputime_max                    ((~0UL >> 1) - 1)
-#define cputime_add(__a, __b)          ((__a) +  (__b))
-#define cputime_sub(__a, __b)          ((__a) -  (__b))
-#define cputime_div(__a, __n) ({               \
-       unsigned long long __div = (__a);       \
-       do_div(__div,__n);                      \
-       __div;                                  \
-})
-#define cputime_halve(__a)             ((__a) >> 1)
-#define cputime_eq(__a, __b)           ((__a) == (__b))
-#define cputime_gt(__a, __b)           ((__a) >  (__b))
-#define cputime_ge(__a, __b)           ((__a) >= (__b))
-#define cputime_lt(__a, __b)           ((__a) <  (__b))
-#define cputime_le(__a, __b)           ((__a) <= (__b))
-#define cputime_to_jiffies(__ct)       (__div((__ct), 4096000000ULL / HZ))
-#define cputime_to_scaled(__ct)                (__ct)
-#define jiffies_to_cputime(__hz)       ((cputime_t)(__hz) * (4096000000ULL / HZ))
-
-#define cputime64_zero                 (0ULL)
-#define cputime64_add(__a, __b)                ((__a) + (__b))
-#define cputime_to_cputime64(__ct)     (__ct)
-
-static inline u64
-cputime64_to_jiffies64(cputime64_t cputime)
-{
-       do_div(cputime, 4096000000ULL / HZ);
-       return cputime;
+static inline u64 cputime64_to_jiffies64(cputime64_t cputime)
+{
+       unsigned long long jif = (__force unsigned long long) cputime;
+       do_div(jif, 4096000000ULL / HZ);
+       return jif;
+}
+
+static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
+{
+       return (__force cputime64_t)(jif * (4096000000ULL / HZ));
 }
 
 /*
  * Convert cputime to microseconds and back.
  */
-static inline unsigned int
-cputime_to_usecs(const cputime_t cputime)
+static inline unsigned int cputime_to_usecs(const cputime_t cputime)
 {
-       return cputime_div(cputime, 4096);
+       return (__force unsigned long long) cputime >> 12;
 }
 
-static inline cputime_t
-usecs_to_cputime(const unsigned int m)
+static inline cputime_t usecs_to_cputime(const unsigned int m)
 {
-       return (cputime_t) m * 4096;
+       return (__force cputime_t)(m * 4096ULL);
 }
 
 #define usecs_to_cputime64(m)          usecs_to_cputime(m)
@@ -92,40 +77,39 @@ usecs_to_cputime(const unsigned int m)
 /*
  * Convert cputime to milliseconds and back.
  */
-static inline unsigned int
-cputime_to_secs(const cputime_t cputime)
+static inline unsigned int cputime_to_secs(const cputime_t cputime)
 {
-       return __div(cputime, 2048000000) >> 1;
+       return __div((__force unsigned long long) cputime, 2048000000) >> 1;
 }
 
-static inline cputime_t
-secs_to_cputime(const unsigned int s)
+static inline cputime_t secs_to_cputime(const unsigned int s)
 {
-       return (cputime_t) s * 4096000000ULL;
+       return (__force cputime_t)(s * 4096000000ULL);
 }
 
 /*
  * Convert cputime to timespec and back.
  */
-static inline cputime_t
-timespec_to_cputime(const struct timespec *value)
+static inline cputime_t timespec_to_cputime(const struct timespec *value)
 {
-       return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
+       unsigned long long ret = value->tv_sec * 4096000000ULL;
+       return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000);
 }
 
-static inline void
-cputime_to_timespec(const cputime_t cputime, struct timespec *value)
+static inline void cputime_to_timespec(const cputime_t cputime,
+                                      struct timespec *value)
 {
+       unsigned long long __cputime = (__force unsigned long long) cputime;
 #ifndef __s390x__
        register_pair rp;
 
-       rp.pair = cputime >> 1;
+       rp.pair = __cputime >> 1;
        asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
        value->tv_nsec = rp.subreg.even * 1000 / 4096;
        value->tv_sec = rp.subreg.odd;
 #else
-       value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
-       value->tv_sec = cputime / 4096000000ULL;
+       value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096;
+       value->tv_sec = __cputime / 4096000000ULL;
 #endif
 }
 
@@ -134,50 +118,52 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
  * Since cputime and timeval have the same resolution (microseconds)
  * this is easy.
  */
-static inline cputime_t
-timeval_to_cputime(const struct timeval *value)
+static inline cputime_t timeval_to_cputime(const struct timeval *value)
 {
-       return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
+       unsigned long long ret = value->tv_sec * 4096000000ULL;
+       return (__force cputime_t)(ret + value->tv_usec * 4096ULL);
 }
 
-static inline void
-cputime_to_timeval(const cputime_t cputime, struct timeval *value)
+static inline void cputime_to_timeval(const cputime_t cputime,
+                                     struct timeval *value)
 {
+       unsigned long long __cputime = (__force unsigned long long) cputime;
 #ifndef __s390x__
        register_pair rp;
 
-       rp.pair = cputime >> 1;
+       rp.pair = __cputime >> 1;
        asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
        value->tv_usec = rp.subreg.even / 4096;
        value->tv_sec = rp.subreg.odd;
 #else
-       value->tv_usec = (cputime % 4096000000ULL) / 4096;
-       value->tv_sec = cputime / 4096000000ULL;
+       value->tv_usec = (__cputime % 4096000000ULL) / 4096;
+       value->tv_sec = __cputime / 4096000000ULL;
 #endif
 }
 
 /*
  * Convert cputime to clock and back.
  */
-static inline clock_t
-cputime_to_clock_t(cputime_t cputime)
+static inline clock_t cputime_to_clock_t(cputime_t cputime)
 {
-       return cputime_div(cputime, 4096000000ULL / USER_HZ);
+       unsigned long long clock = (__force unsigned long long) cputime;
+       do_div(clock, 4096000000ULL / USER_HZ);
+       return clock;
 }
 
-static inline cputime_t
-clock_t_to_cputime(unsigned long x)
+static inline cputime_t clock_t_to_cputime(unsigned long x)
 {
-       return (cputime_t) x * (4096000000ULL / USER_HZ);
+       return (__force cputime_t)(x * (4096000000ULL / USER_HZ));
 }
 
 /*
  * Convert cputime64 to clock.
  */
-static inline clock_t
-cputime64_to_clock_t(cputime64_t cputime)
+static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
 {
-       return cputime_div(cputime, 4096000000ULL / USER_HZ);
+       unsigned long long clock = (__force unsigned long long) cputime;
+       do_div(clock, 4096000000ULL / USER_HZ);
+       return clock;
 }
 
 struct s390_idle_data {
index e54c4ff..f11d1b0 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
@@ -820,7 +821,8 @@ setup_memory(void)
                end_chunk = min(end_chunk, end_pfn);
                if (start_chunk >= end_chunk)
                        continue;
-               add_active_range(0, start_chunk, end_chunk);
+               memblock_add_node(PFN_PHYS(start_chunk),
+                                 PFN_PHYS(end_chunk - start_chunk), 0);
                pfn = max(start_chunk, start_pfn);
                for (; pfn < end_chunk; pfn++)
                        page_set_storage_key(PFN_PHYS(pfn),
index f43c0e4..9daee91 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/irq.h>
 
 #include "hwsampler.h"
+#include "op_counter.h"
 
 #define MAX_NUM_SDB 511
 #define MIN_NUM_SDB 1
@@ -896,6 +897,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
                if (sample_data_ptr->P == 1) {
                        /* userspace sample */
                        unsigned int pid = sample_data_ptr->prim_asn;
+                       if (!counter_config.user)
+                               goto skip_sample;
                        rcu_read_lock();
                        tsk = pid_task(find_vpid(pid), PIDTYPE_PID);
                        if (tsk)
@@ -903,6 +906,8 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
                        rcu_read_unlock();
                } else {
                        /* kernelspace sample */
+                       if (!counter_config.kernel)
+                               goto skip_sample;
                        regs = task_pt_regs(current);
                }
 
@@ -910,7 +915,7 @@ static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt,
                oprofile_add_ext_hw_sample(sample_data_ptr->ia, regs, 0,
                                !sample_data_ptr->P, tsk);
                mutex_unlock(&hws_sem);
-
+       skip_sample:
                sample_data_ptr++;
        }
 }
index bd58b72..2297be4 100644 (file)
@@ -2,10 +2,11 @@
  * arch/s390/oprofile/init.c
  *
  * S390 Version
- *   Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *   Copyright (C) 2002-2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
  *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
  *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
  *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
+ *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
  *
  * @remark Copyright 2002-2011 OProfile authors
  */
@@ -14,6 +15,8 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/module.h>
+#include <asm/processor.h>
 
 #include "../../../drivers/oprofile/oprof.h"
 
@@ -22,6 +25,7 @@ extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth);
 #ifdef CONFIG_64BIT
 
 #include "hwsampler.h"
+#include "op_counter.h"
 
 #define DEFAULT_INTERVAL       4127518
 
@@ -35,16 +39,41 @@ static unsigned long oprofile_max_interval;
 static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
 static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
 
-static int hwsampler_file;
+static int hwsampler_enabled;
 static int hwsampler_running;  /* start_mutex must be held to change */
+static int hwsampler_available;
 
 static struct oprofile_operations timer_ops;
 
+struct op_counter_config counter_config;
+
+enum __force_cpu_type {
+       reserved = 0,           /* do not force */
+       timer,
+};
+static int force_cpu_type;
+
+static int set_cpu_type(const char *str, struct kernel_param *kp)
+{
+       if (!strcmp(str, "timer")) {
+               force_cpu_type = timer;
+               printk(KERN_INFO "oprofile: forcing timer to be returned "
+                                "as cpu type\n");
+       } else {
+               force_cpu_type = 0;
+       }
+
+       return 0;
+}
+module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
+MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
+                          "(report cpu_type \"timer\"");
+
 static int oprofile_hwsampler_start(void)
 {
        int retval;
 
-       hwsampler_running = hwsampler_file;
+       hwsampler_running = hwsampler_enabled;
 
        if (!hwsampler_running)
                return timer_ops.start();
@@ -72,10 +101,16 @@ static void oprofile_hwsampler_stop(void)
        return;
 }
 
+/*
+ * File ops used for:
+ * /dev/oprofile/0/enabled
+ * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
+ */
+
 static ssize_t hwsampler_read(struct file *file, char __user *buf,
                size_t count, loff_t *offset)
 {
-       return oprofilefs_ulong_to_user(hwsampler_file, buf, count, offset);
+       return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
 }
 
 static ssize_t hwsampler_write(struct file *file, char const __user *buf,
@@ -91,6 +126,9 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
        if (retval <= 0)
                return retval;
 
+       if (val != 0 && val != 1)
+               return -EINVAL;
+
        if (oprofile_started)
                /*
                 * save to do without locking as we set
@@ -99,7 +137,7 @@ static ssize_t hwsampler_write(struct file *file, char const __user *buf,
                 */
                return -EBUSY;
 
-       hwsampler_file = val;
+       hwsampler_enabled = val;
 
        return count;
 }
@@ -109,38 +147,311 @@ static const struct file_operations hwsampler_fops = {
        .write          = hwsampler_write,
 };
 
+/*
+ * File ops used for:
+ * /dev/oprofile/0/count
+ * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
+ *
+ * Make sure that the value is within the hardware range.
+ */
+
+static ssize_t hw_interval_read(struct file *file, char __user *buf,
+                               size_t count, loff_t *offset)
+{
+       return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
+                                       count, offset);
+}
+
+static ssize_t hw_interval_write(struct file *file, char const __user *buf,
+                                size_t count, loff_t *offset)
+{
+       unsigned long val;
+       int retval;
+
+       if (*offset)
+               return -EINVAL;
+       retval = oprofilefs_ulong_from_user(&val, buf, count);
+       if (retval)
+               return retval;
+       if (val < oprofile_min_interval)
+               oprofile_hw_interval = oprofile_min_interval;
+       else if (val > oprofile_max_interval)
+               oprofile_hw_interval = oprofile_max_interval;
+       else
+               oprofile_hw_interval = val;
+
+       return count;
+}
+
+static const struct file_operations hw_interval_fops = {
+       .read           = hw_interval_read,
+       .write          = hw_interval_write,
+};
+
+/*
+ * File ops used for:
+ * /dev/oprofile/0/event
+ * Only a single event with number 0 is supported with this counter.
+ *
+ * /dev/oprofile/0/unit_mask
+ * This is a dummy file needed by the user space tools.
+ * No value other than 0 is accepted or returned.
+ */
+
+static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
+                                   size_t count, loff_t *offset)
+{
+       return oprofilefs_ulong_to_user(0, buf, count, offset);
+}
+
+static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
+                                    size_t count, loff_t *offset)
+{
+       unsigned long val;
+       int retval;
+
+       if (*offset)
+               return -EINVAL;
+
+       retval = oprofilefs_ulong_from_user(&val, buf, count);
+       if (retval)
+               return retval;
+       if (val != 0)
+               return -EINVAL;
+       return count;
+}
+
+static const struct file_operations zero_fops = {
+       .read           = hwsampler_zero_read,
+       .write          = hwsampler_zero_write,
+};
+
+/* /dev/oprofile/0/kernel file ops.  */
+
+static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
+                                    size_t count, loff_t *offset)
+{
+       return oprofilefs_ulong_to_user(counter_config.kernel,
+                                       buf, count, offset);
+}
+
+static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
+                                     size_t count, loff_t *offset)
+{
+       unsigned long val;
+       int retval;
+
+       if (*offset)
+               return -EINVAL;
+
+       retval = oprofilefs_ulong_from_user(&val, buf, count);
+       if (retval)
+               return retval;
+
+       if (val != 0 && val != 1)
+               return -EINVAL;
+
+       counter_config.kernel = val;
+
+       return count;
+}
+
+static const struct file_operations kernel_fops = {
+       .read           = hwsampler_kernel_read,
+       .write          = hwsampler_kernel_write,
+};
+
+/* /dev/oprofile/0/user file ops. */
+
+static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
+                                  size_t count, loff_t *offset)
+{
+       return oprofilefs_ulong_to_user(counter_config.user,
+                                       buf, count, offset);
+}
+
+static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
+                                   size_t count, loff_t *offset)
+{
+       unsigned long val;
+       int retval;
+
+       if (*offset)
+               return -EINVAL;
+
+       retval = oprofilefs_ulong_from_user(&val, buf, count);
+       if (retval)
+               return retval;
+
+       if (val != 0 && val != 1)
+               return -EINVAL;
+
+       counter_config.user = val;
+
+       return count;
+}
+
+static const struct file_operations user_fops = {
+       .read           = hwsampler_user_read,
+       .write          = hwsampler_user_write,
+};
+
+
+/*
+ * File ops used for: /dev/oprofile/timer/enabled
+ * The value always has to be the inverted value of hwsampler_enabled. So
+ * no separate variable is created. That way we do not need locking.
+ */
+
+static ssize_t timer_enabled_read(struct file *file, char __user *buf,
+                                 size_t count, loff_t *offset)
+{
+       return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
+}
+
+static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
+                                  size_t count, loff_t *offset)
+{
+       unsigned long val;
+       int retval;
+
+       if (*offset)
+               return -EINVAL;
+
+       retval = oprofilefs_ulong_from_user(&val, buf, count);
+       if (retval)
+               return retval;
+
+       if (val != 0 && val != 1)
+               return -EINVAL;
+
+       /* Timer cannot be disabled without having hardware sampling.  */
+       if (val == 0 && !hwsampler_available)
+               return -EINVAL;
+
+       if (oprofile_started)
+               /*
+                * save to do without locking as we set
+                * hwsampler_running in start() when start_mutex is
+                * held
+                */
+               return -EBUSY;
+
+       hwsampler_enabled = !val;
+
+       return count;
+}
+
+static const struct file_operations timer_enabled_fops = {
+       .read           = timer_enabled_read,
+       .write          = timer_enabled_write,
+};
+
+
 static int oprofile_create_hwsampling_files(struct super_block *sb,
-                                               struct dentry *root)
+                                           struct dentry *root)
 {
-       struct dentry *hw_dir;
+       struct dentry *dir;
+
+       dir = oprofilefs_mkdir(sb, root, "timer");
+       if (!dir)
+               return -EINVAL;
+
+       oprofilefs_create_file(sb, dir, "enabled", &timer_enabled_fops);
+
+       if (!hwsampler_available)
+               return 0;
 
        /* reinitialize default values */
-       hwsampler_file = 1;
+       hwsampler_enabled = 1;
+       counter_config.kernel = 1;
+       counter_config.user = 1;
 
-       hw_dir = oprofilefs_mkdir(sb, root, "hwsampling");
-       if (!hw_dir)
-               return -EINVAL;
+       if (!force_cpu_type) {
+               /*
+                * Create the counter file system.  A single virtual
+                * counter is created which can be used to
+                * enable/disable hardware sampling dynamically from
+                * user space.  The user space will configure a single
+                * counter with a single event.  The value of 'event'
+                * and 'unit_mask' are not evaluated by the kernel code
+                * and can only be set to 0.
+                */
+
+               dir = oprofilefs_mkdir(sb, root, "0");
+               if (!dir)
+                       return -EINVAL;
 
-       oprofilefs_create_file(sb, hw_dir, "hwsampler", &hwsampler_fops);
-       oprofilefs_create_ulong(sb, hw_dir, "hw_interval",
-                               &oprofile_hw_interval);
-       oprofilefs_create_ro_ulong(sb, hw_dir, "hw_min_interval",
-                               &oprofile_min_interval);
-       oprofilefs_create_ro_ulong(sb, hw_dir, "hw_max_interval",
-                               &oprofile_max_interval);
-       oprofilefs_create_ulong(sb, hw_dir, "hw_sdbt_blocks",
-                               &oprofile_sdbt_blocks);
+               oprofilefs_create_file(sb, dir, "enabled", &hwsampler_fops);
+               oprofilefs_create_file(sb, dir, "event", &zero_fops);
+               oprofilefs_create_file(sb, dir, "count", &hw_interval_fops);
+               oprofilefs_create_file(sb, dir, "unit_mask", &zero_fops);
+               oprofilefs_create_file(sb, dir, "kernel", &kernel_fops);
+               oprofilefs_create_file(sb, dir, "user", &user_fops);
+               oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+                                       &oprofile_sdbt_blocks);
 
+       } else {
+               /*
+                * Hardware sampling can be used but the cpu_type is
+                * forced to timer in order to deal with legacy user
+                * space tools.  The /dev/oprofile/hwsampling fs is
+                * provided in that case.
+                */
+               dir = oprofilefs_mkdir(sb, root, "hwsampling");
+               if (!dir)
+                       return -EINVAL;
+
+               oprofilefs_create_file(sb, dir, "hwsampler",
+                                      &hwsampler_fops);
+               oprofilefs_create_file(sb, dir, "hw_interval",
+                                      &hw_interval_fops);
+               oprofilefs_create_ro_ulong(sb, dir, "hw_min_interval",
+                                          &oprofile_min_interval);
+               oprofilefs_create_ro_ulong(sb, dir, "hw_max_interval",
+                                          &oprofile_max_interval);
+               oprofilefs_create_ulong(sb, dir, "hw_sdbt_blocks",
+                                       &oprofile_sdbt_blocks);
+       }
        return 0;
 }
 
 static int oprofile_hwsampler_init(struct oprofile_operations *ops)
 {
+       /*
+        * Initialize the timer mode infrastructure as well in order
+        * to be able to switch back dynamically.  oprofile_timer_init
+        * is not supposed to fail.
+        */
+       if (oprofile_timer_init(ops))
+               BUG();
+
+       memcpy(&timer_ops, ops, sizeof(timer_ops));
+       ops->create_files = oprofile_create_hwsampling_files;
+
+       /*
+        * If the user space tools do not support newer cpu types,
+        * the force_cpu_type module parameter
+        * can be used to always return \"timer\" as cpu type.
+        */
+       if (force_cpu_type != timer) {
+               struct cpuid id;
+
+               get_cpu_id (&id);
+
+               switch (id.machine) {
+               case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
+               case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
+               default: return -ENODEV;
+               }
+       }
+
        if (hwsampler_setup())
                return -ENODEV;
 
        /*
-        * create hwsampler files only if hwsampler_setup() succeeds.
+        * Query the range for the sampling interval from the
+        * hardware.
         */
        oprofile_min_interval = hwsampler_query_min_interval();
        if (oprofile_min_interval == 0)
@@ -155,23 +466,17 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops)
        if (oprofile_hw_interval > oprofile_max_interval)
                oprofile_hw_interval = oprofile_max_interval;
 
-       if (oprofile_timer_init(ops))
-               return -ENODEV;
-
-       printk(KERN_INFO "oprofile: using hardware sampling\n");
-
-       memcpy(&timer_ops, ops, sizeof(timer_ops));
+       printk(KERN_INFO "oprofile: System z hardware sampling "
+              "facility found.\n");
 
        ops->start = oprofile_hwsampler_start;
        ops->stop = oprofile_hwsampler_stop;
-       ops->create_files = oprofile_create_hwsampling_files;
 
        return 0;
 }
 
 static void oprofile_hwsampler_exit(void)
 {
-       oprofile_timer_exit();
        hwsampler_shutdown();
 }
 
@@ -182,7 +487,15 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
        ops->backtrace = s390_backtrace;
 
 #ifdef CONFIG_64BIT
-       return oprofile_hwsampler_init(ops);
+
+       /*
+        * -ENODEV is not reported to the caller.  The module itself
+         * will use the timer mode sampling as fallback and this is
+         * always available.
+        */
+       hwsampler_available = oprofile_hwsampler_init(ops) == 0;
+
+       return 0;
 #else
        return -ENODEV;
 #endif
diff --git a/arch/s390/oprofile/op_counter.h b/arch/s390/oprofile/op_counter.h
new file mode 100644 (file)
index 0000000..1a8d3ca
--- /dev/null
@@ -0,0 +1,23 @@
+/**
+ * arch/s390/oprofile/op_counter.h
+ *
+ *   Copyright (C) 2011 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
+ *
+ * @remark Copyright 2011 OProfile authors
+ */
+
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+struct op_counter_config {
+       /* `enabled' maps to the hwsampler_file variable.  */
+       /* `count' maps to the oprofile_hw_interval variable.  */
+       /* `event' and `unit_mask' are unused. */
+       unsigned long kernel;
+       unsigned long user;
+};
+
+extern struct op_counter_config counter_config;
+
+#endif /* OP_COUNTER_H */
index df169e8..8b0c946 100644 (file)
@@ -4,6 +4,9 @@ config SCORE
        def_bool y
        select HAVE_GENERIC_HARDIRQS
        select GENERIC_IRQ_SHOW
+       select HAVE_MEMBLOCK
+       select HAVE_MEMBLOCK_NODE_MAP
+       select ARCH_DISCARD_MEMBLOCK
 
 choice
        prompt "System type"
@@ -60,9 +63,6 @@ config 32BIT
 config ARCH_FLATMEM_ENABLE
        def_bool y
 
-config ARCH_POPULATES_NODE_MAP
-       def_bool y
-
 source "mm/Kconfig"
 
 config MEMORY_START
index 6f898c0..b48459a 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/bootmem.h>
 #include <linux/initrd.h>
 #include <linux/ioport.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 #include <linux/screen_info.h>
@@ -54,7 +55,8 @@ static void __init bootmem_init(void)
        /* Initialize the boot-time allocator with low memory only. */
        bootmap_size = init_bootmem_node(NODE_DATA(0), start_pfn,
                                         min_low_pfn, max_low_pfn);
-       add_active_range(0, min_low_pfn, max_low_pfn);
+       memblock_add_node(PFN_PHYS(min_low_pfn),
+                         PFN_PHYS(max_low_pfn - min_low_pfn), 0);
 
        free_bootmem(PFN_PHYS(start_pfn),
                     (max_low_pfn - start_pfn) << PAGE_SHIFT);
index 5629e20..47a2f1c 100644 (file)
@@ -4,6 +4,7 @@ config SUPERH
        select CLKDEV_LOOKUP
        select HAVE_IDE if HAS_IOPORT
        select HAVE_MEMBLOCK
+       select HAVE_MEMBLOCK_NODE_MAP
        select HAVE_OPROFILE
        select HAVE_GENERIC_DMA_COHERENT
        select HAVE_ARCH_TRACEHOOK
diff --git a/arch/sh/include/asm/memblock.h b/arch/sh/include/asm/memblock.h
deleted file mode 100644 (file)
index e87063f..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __ASM_SH_MEMBLOCK_H
-#define __ASM_SH_MEMBLOCK_H
-
-#endif /* __ASM_SH_MEMBLOCK_H */
index c5a33f0..9fea49f 100644 (file)
@@ -157,9 +157,6 @@ void __init reserve_crashkernel(void)
        unsigned long long crash_size, crash_base;
        int ret;
 
-       /* this is necessary because of memblock_phys_mem_size() */
-       memblock_analyze();
-
        ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
                        &crash_size, &crash_base);
        if (ret == 0 && crash_size > 0) {
index 1a0e946..7b57bf1 100644 (file)
@@ -230,7 +230,8 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn,
        pmb_bolt_mapping((unsigned long)__va(start), start, end - start,
                         PAGE_KERNEL);
 
-       add_active_range(nid, start_pfn, end_pfn);
+       memblock_set_node(PFN_PHYS(start_pfn),
+                         PFN_PHYS(end_pfn - start_pfn), nid);
 }
 
 void __init __weak plat_early_device_setup(void)
index c3e61b3..cb8f992 100644 (file)
@@ -143,9 +143,6 @@ config MAX_ACTIVE_REGIONS
                       CPU_SUBTYPE_SH7785)
        default "1"
 
-config ARCH_POPULATES_NODE_MAP
-       def_bool y
-
 config ARCH_SELECT_MEMORY_MODEL
        def_bool y
 
index 939ca0f..82cc576 100644 (file)
@@ -324,7 +324,6 @@ void __init paging_init(void)
        unsigned long vaddr, end;
        int nid;
 
-       memblock_init();
        sh_mv.mv_mem_init();
 
        early_reserve_mem();
@@ -337,7 +336,7 @@ void __init paging_init(void)
                sh_mv.mv_mem_reserve();
 
        memblock_enforce_memory_limit(memory_limit);
-       memblock_analyze();
+       memblock_allow_resize();
 
        memblock_dump_all();
 
index f92602e..70ae9d8 100644 (file)
@@ -43,6 +43,7 @@ config SPARC64
        select HAVE_KPROBES
        select HAVE_RCU_TABLE_FREE if SMP
        select HAVE_MEMBLOCK
+       select HAVE_MEMBLOCK_NODE_MAP
        select HAVE_SYSCALL_WRAPPERS
        select HAVE_DYNAMIC_FTRACE
        select HAVE_FTRACE_MCOUNT_RECORD
@@ -352,9 +353,6 @@ config NODES_SPAN_OTHER_NODES
        def_bool y
        depends on NEED_MULTIPLE_NODES
 
-config ARCH_POPULATES_NODE_MAP
-       def_bool y if SPARC64
-
 config ARCH_SELECT_MEMORY_MODEL
        def_bool y if SPARC64
 
diff --git a/arch/sparc/include/asm/memblock.h b/arch/sparc/include/asm/memblock.h
deleted file mode 100644 (file)
index c67b047..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _SPARC64_MEMBLOCK_H
-#define _SPARC64_MEMBLOCK_H
-
-#include <asm/oplib.h>
-
-#define MEMBLOCK_DBG(fmt...) prom_printf(fmt)
-
-#endif /* !(_SPARC64_MEMBLOCK_H) */
index 8e073d8..b3f5e7d 100644 (file)
@@ -790,7 +790,7 @@ static int find_node(unsigned long addr)
        return -1;
 }
 
-u64 memblock_nid_range(u64 start, u64 end, int *nid)
+static u64 memblock_nid_range(u64 start, u64 end, int *nid)
 {
        *nid = find_node(start);
        start += PAGE_SIZE;
@@ -808,7 +808,7 @@ u64 memblock_nid_range(u64 start, u64 end, int *nid)
        return start;
 }
 #else
-u64 memblock_nid_range(u64 start, u64 end, int *nid)
+static u64 memblock_nid_range(u64 start, u64 end, int *nid)
 {
        *nid = 0;
        return end;
@@ -816,7 +816,7 @@ u64 memblock_nid_range(u64 start, u64 end, int *nid)
 #endif
 
 /* This must be invoked after performing all of the necessary
- * add_active_range() calls for 'nid'.  We need to be able to get
+ * memblock_set_node() calls for 'nid'.  We need to be able to get
  * correct data from get_pfn_range_for_nid().
  */
 static void __init allocate_node_data(int nid)
@@ -987,14 +987,11 @@ static void __init add_node_ranges(void)
 
                        this_end = memblock_nid_range(start, end, &nid);
 
-                       numadbg("Adding active range nid[%d] "
+                       numadbg("Setting memblock NUMA node nid[%d] "
                                "start[%lx] end[%lx]\n",
                                nid, start, this_end);
 
-                       add_active_range(nid,
-                                        start >> PAGE_SHIFT,
-                                        this_end >> PAGE_SHIFT);
-
+                       memblock_set_node(start, this_end - start, nid);
                        start = this_end;
                }
        }
@@ -1282,7 +1279,6 @@ static void __init bootmem_init_nonnuma(void)
 {
        unsigned long top_of_ram = memblock_end_of_DRAM();
        unsigned long total_ram = memblock_phys_mem_size();
-       struct memblock_region *reg;
 
        numadbg("bootmem_init_nonnuma()\n");
 
@@ -1292,20 +1288,8 @@ static void __init bootmem_init_nonnuma(void)
               (top_of_ram - total_ram) >> 20);
 
        init_node_masks_nonnuma();
-
-       for_each_memblock(memory, reg) {
-               unsigned long start_pfn, end_pfn;
-
-               if (!reg->size)
-                       continue;
-
-               start_pfn = memblock_region_memory_base_pfn(reg);
-               end_pfn = memblock_region_memory_end_pfn(reg);
-               add_active_range(0, start_pfn, end_pfn);
-       }
-
+       memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0);
        allocate_node_data(0);
-
        node_set_online(0);
 }
 
@@ -1769,8 +1753,6 @@ void __init paging_init(void)
                sun4v_ktsb_init();
        }
 
-       memblock_init();
-
        /* Find available physical memory...
         *
         * Read it twice in order to work around a bug in openfirmware.
@@ -1796,7 +1778,7 @@ void __init paging_init(void)
 
        memblock_enforce_memory_limit(cmdline_memory_size);
 
-       memblock_analyze();
+       memblock_allow_resize();
        memblock_dump_all();
 
        set_bit(0, mmu_context_bmap);
index a08d9fa..82a6e22 100644 (file)
@@ -75,8 +75,6 @@ static struct clocksource itimer_clocksource = {
        .rating         = 300,
        .read           = itimer_read,
        .mask           = CLOCKSOURCE_MASK(64),
-       .mult           = 1000,
-       .shift          = 0,
        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
@@ -94,9 +92,9 @@ static void __init setup_itimer(void)
                clockevent_delta2ns(60 * HZ, &itimer_clockevent);
        itimer_clockevent.min_delta_ns =
                clockevent_delta2ns(1, &itimer_clockevent);
-       err = clocksource_register(&itimer_clocksource);
+       err = clocksource_register_hz(&itimer_clocksource, USEC_PER_SEC);
        if (err) {
-               printk(KERN_ERR "clocksource_register returned %d\n", err);
+               printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
                return;
        }
        clockevents_register_device(&itimer_clockevent);
index 471b6bc..673d7a8 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/traps.h>
+#include <asm/memblock.h>
 
 #include "setup.h"
 
index 3b379cd..de186bd 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/setup.h>
 #include <asm/sizes.h>
 #include <asm/tlb.h>
+#include <asm/memblock.h>
 #include <mach/map.h>
 
 #include "mm.h"
@@ -245,7 +246,6 @@ void __init uc32_memblock_init(struct meminfo *mi)
        sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]),
                meminfo_cmp, NULL);
 
-       memblock_init();
        for (i = 0; i < mi->nr_banks; i++)
                memblock_add(mi->bank[i].start, mi->bank[i].size);
 
@@ -264,7 +264,7 @@ void __init uc32_memblock_init(struct meminfo *mi)
 
        uc32_mm_memblock_reserve();
 
-       memblock_analyze();
+       memblock_allow_resize();
        memblock_dump_all();
 }
 
index 3e5c3e5..43c20b4 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/setup.h>
 #include <asm/sizes.h>
 #include <asm/tlb.h>
+#include <asm/memblock.h>
 
 #include <mach/map.h>
 
index efb4294..5731eb7 100644 (file)
@@ -26,6 +26,8 @@ config X86
        select HAVE_IOREMAP_PROT
        select HAVE_KPROBES
        select HAVE_MEMBLOCK
+       select HAVE_MEMBLOCK_NODE_MAP
+       select ARCH_DISCARD_MEMBLOCK
        select ARCH_WANT_OPTIONAL_GPIOLIB
        select ARCH_WANT_FRAME_POINTERS
        select HAVE_DMA_ATTRS
@@ -204,9 +206,6 @@ config ZONE_DMA32
        bool
        default X86_64
 
-config ARCH_POPULATES_NODE_MAP
-       def_bool y
-
 config AUDIT_ARCH
        bool
        default X86_64
@@ -343,6 +342,7 @@ config X86_EXTENDED_PLATFORM
 
          If you enable this option then you'll be able to select support
          for the following (non-PC) 64 bit x86 platforms:
+               Numascale NumaChip
                ScaleMP vSMP
                SGI Ultraviolet
 
@@ -351,6 +351,18 @@ config X86_EXTENDED_PLATFORM
 endif
 # This is an alphabetically sorted list of 64 bit extended platforms
 # Please maintain the alphabetic order if and when there are additions
+config X86_NUMACHIP
+       bool "Numascale NumaChip"
+       depends on X86_64
+       depends on X86_EXTENDED_PLATFORM
+       depends on NUMA
+       depends on SMP
+       depends on X86_X2APIC
+       depends on !EDAC_AMD64
+       ---help---
+         Adds support for Numascale NumaChip large-SMP systems. Needed to
+         enable more than ~168 cores.
+         If you don't have one of these, you should say N here.
 
 config X86_VSMP
        bool "ScaleMP vSMP"
index a6253ec..3e27456 100644 (file)
@@ -134,7 +134,7 @@ ENTRY(ia32_sysenter_target)
        CFI_REL_OFFSET rsp,0
        pushfq_cfi
        /*CFI_REL_OFFSET rflags,0*/
-       movl    8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d
+       movl    TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
        CFI_REGISTER rip,r10
        pushq_cfi $__USER32_CS
        /*CFI_REL_OFFSET cs,0*/
@@ -150,9 +150,8 @@ ENTRY(ia32_sysenter_target)
        .section __ex_table,"a"
        .quad 1b,ia32_badarg
        .previous       
-       GET_THREAD_INFO(%r10)
-       orl    $TS_COMPAT,TI_status(%r10)
-       testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+       orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        CFI_REMEMBER_STATE
        jnz  sysenter_tracesys
        cmpq    $(IA32_NR_syscalls-1),%rax
@@ -162,13 +161,12 @@ sysenter_do_call:
 sysenter_dispatch:
        call    *ia32_sys_call_table(,%rax,8)
        movq    %rax,RAX-ARGOFFSET(%rsp)
-       GET_THREAD_INFO(%r10)
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl   $_TIF_ALLWORK_MASK,TI_flags(%r10)
+       testl   $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jnz     sysexit_audit
 sysexit_from_sys_call:
-       andl    $~TS_COMPAT,TI_status(%r10)
+       andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        /* clear IF, that popfq doesn't enable interrupts early */
        andl  $~0x200,EFLAGS-R11(%rsp) 
        movl    RIP-R11(%rsp),%edx              /* User %eip */
@@ -205,7 +203,7 @@ sysexit_from_sys_call:
        .endm
 
        .macro auditsys_exit exit
-       testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+       testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jnz ia32_ret_from_sys_call
        TRACE_IRQS_ON
        sti
@@ -215,12 +213,11 @@ sysexit_from_sys_call:
        movzbl %al,%edi         /* zero-extend that into %edi */
        inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
        call audit_syscall_exit
-       GET_THREAD_INFO(%r10)
        movl RAX-ARGOFFSET(%rsp),%eax   /* reload syscall return value */
        movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
        cli
        TRACE_IRQS_OFF
-       testl %edi,TI_flags(%r10)
+       testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jz \exit
        CLEAR_RREGS -ARGOFFSET
        jmp int_with_check
@@ -238,7 +235,7 @@ sysexit_audit:
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-       testl   $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+       testl   $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jz      sysenter_auditsys
 #endif
        SAVE_REST
@@ -309,9 +306,8 @@ ENTRY(ia32_cstar_target)
        .section __ex_table,"a"
        .quad 1b,ia32_badarg
        .previous       
-       GET_THREAD_INFO(%r10)
-       orl   $TS_COMPAT,TI_status(%r10)
-       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+       orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        CFI_REMEMBER_STATE
        jnz   cstar_tracesys
        cmpq $IA32_NR_syscalls-1,%rax
@@ -321,13 +317,12 @@ cstar_do_call:
 cstar_dispatch:
        call *ia32_sys_call_table(,%rax,8)
        movq %rax,RAX-ARGOFFSET(%rsp)
-       GET_THREAD_INFO(%r10)
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
+       testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jnz sysretl_audit
 sysretl_from_sys_call:
-       andl $~TS_COMPAT,TI_status(%r10)
+       andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        RESTORE_ARGS 0,-ARG_SKIP,0,0,0
        movl RIP-ARGOFFSET(%rsp),%ecx
        CFI_REGISTER rip,rcx
@@ -355,7 +350,7 @@ sysretl_audit:
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-       testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
+       testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jz cstar_auditsys
 #endif
        xchgl %r9d,%ebp
@@ -420,9 +415,8 @@ ENTRY(ia32_syscall)
        /* note the registers are not zero extended to the sf.
           this could be a problem. */
        SAVE_ARGS 0,1,0
-       GET_THREAD_INFO(%r10)
-       orl   $TS_COMPAT,TI_status(%r10)
-       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+       orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
        jnz ia32_tracesys
        cmpq $(IA32_NR_syscalls-1),%rax
        ja ia32_badsys
@@ -459,8 +453,8 @@ quiet_ni_syscall:
        CFI_ENDPROC
        
        .macro PTREGSCALL label, func, arg
-       .globl \label
-\label:
+       ALIGN
+GLOBAL(\label)
        leaq \func(%rip),%rax
        leaq -ARGOFFSET+8(%rsp),\arg    /* 8 for return address */
        jmp  ia32_ptregs_common 
@@ -477,7 +471,8 @@ quiet_ni_syscall:
        PTREGSCALL stub32_vfork, sys_vfork, %rdi
        PTREGSCALL stub32_iopl, sys_iopl, %rsi
 
-ENTRY(ia32_ptregs_common)
+       ALIGN
+ia32_ptregs_common:
        popq %r11
        CFI_ENDPROC
        CFI_STARTPROC32 simple
index 091508b..952bd01 100644 (file)
@@ -4,10 +4,10 @@
 
 #ifdef CONFIG_SMP
        .macro LOCK_PREFIX
-1:     lock
+672:   lock
        .section .smp_locks,"a"
        .balign 4
-       .long 1b - .
+       .long 672b - .
        .previous
        .endm
 #else
index 1a6c09a..3ab9bdd 100644 (file)
@@ -176,6 +176,7 @@ static inline u64 native_x2apic_icr_read(void)
 }
 
 extern int x2apic_phys;
+extern int x2apic_preenabled;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
@@ -198,6 +199,9 @@ static inline void x2apic_force_phys(void)
        x2apic_phys = 1;
 }
 #else
+static inline void disable_x2apic(void)
+{
+}
 static inline void check_x2apic(void)
 {
 }
@@ -212,6 +216,7 @@ static inline void x2apic_force_phys(void)
 {
 }
 
+#define        nox2apic        0
 #define        x2apic_preenabled 0
 #define        x2apic_supported()      0
 #endif
@@ -410,6 +415,7 @@ extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
+
 static inline u32 apic_read(u32 reg)
 {
        return apic->read(reg);
diff --git a/arch/x86/include/asm/apic_flat_64.h b/arch/x86/include/asm/apic_flat_64.h
new file mode 100644 (file)
index 0000000..a2d3127
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _ASM_X86_APIC_FLAT_64_H
+#define _ASM_X86_APIC_FLAT_64_H
+
+extern void flat_init_apic_ldr(void);
+
+#endif
+
index 3925d80..134bba0 100644 (file)
 
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
 #define APIC_BASE_MSR  0x800
+#define XAPIC_ENABLE   (1UL << 11)
 #define X2APIC_ENABLE  (1UL << 10)
 
 #ifdef CONFIG_X86_32
index 1775d6e..b97596e 100644 (file)
@@ -380,6 +380,8 @@ static inline unsigned long __fls(unsigned long word)
        return word;
 }
 
+#undef ADDR
+
 #ifdef __KERNEL__
 /**
  * ffs - find first set bit in word
@@ -395,10 +397,25 @@ static inline unsigned long __fls(unsigned long word)
 static inline int ffs(int x)
 {
        int r;
-#ifdef CONFIG_X86_CMOV
+
+#ifdef CONFIG_X86_64
+       /*
+        * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
+        * dest reg is undefined if x==0, but their CPU architect says its
+        * value is written to set it to the same as before, except that the
+        * top 32 bits will be cleared.
+        *
+        * We cannot do this on 32 bits because at the very least some
+        * 486 CPUs did not behave this way.
+        */
+       long tmp = -1;
+       asm("bsfl %1,%0"
+           : "=r" (r)
+           : "rm" (x), "0" (tmp));
+#elif defined(CONFIG_X86_CMOV)
        asm("bsfl %1,%0\n\t"
            "cmovzl %2,%0"
-           : "=r" (r) : "rm" (x), "r" (-1));
+           : "=&r" (r) : "rm" (x), "r" (-1));
 #else
        asm("bsfl %1,%0\n\t"
            "jnz 1f\n\t"
@@ -422,7 +439,22 @@ static inline int ffs(int x)
 static inline int fls(int x)
 {
        int r;
-#ifdef CONFIG_X86_CMOV
+
+#ifdef CONFIG_X86_64
+       /*
+        * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
+        * dest reg is undefined if x==0, but their CPU architect says its
+        * value is written to set it to the same as before, except that the
+        * top 32 bits will be cleared.
+        *
+        * We cannot do this on 32 bits because at the very least some
+        * 486 CPUs did not behave this way.
+        */
+       long tmp = -1;
+       asm("bsrl %1,%0"
+           : "=r" (r)
+           : "rm" (x), "0" (tmp));
+#elif defined(CONFIG_X86_CMOV)
        asm("bsrl %1,%0\n\t"
            "cmovzl %2,%0"
            : "=&r" (r) : "rm" (x), "rm" (-1));
@@ -434,11 +466,35 @@ static inline int fls(int x)
 #endif
        return r + 1;
 }
-#endif /* __KERNEL__ */
-
-#undef ADDR
 
-#ifdef __KERNEL__
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+#ifdef CONFIG_X86_64
+static __always_inline int fls64(__u64 x)
+{
+       long bitpos = -1;
+       /*
+        * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
+        * dest reg is undefined if x==0, but their CPU architect says its
+        * value is written to set it to the same as before.
+        */
+       asm("bsrq %1,%0"
+           : "+r" (bitpos)
+           : "rm" (x));
+       return bitpos + 1;
+}
+#else
+#include <asm-generic/bitops/fls64.h>
+#endif
 
 #include <asm-generic/bitops/find.h>
 
@@ -450,12 +506,6 @@ static inline int fls(int x)
 
 #include <asm-generic/bitops/const_hweight.h>
 
-#endif /* __KERNEL__ */
-
-#include <asm-generic/bitops/fls64.h>
-
-#ifdef __KERNEL__
-
 #include <asm-generic/bitops/le.h>
 
 #include <asm-generic/bitops/ext2-atomic-setbit.h>
index 5d3acdf..0c9fa27 100644 (file)
@@ -14,6 +14,8 @@ extern void __cmpxchg_wrong_size(void)
        __compiletime_error("Bad argument size for cmpxchg");
 extern void __xadd_wrong_size(void)
        __compiletime_error("Bad argument size for xadd");
+extern void __add_wrong_size(void)
+       __compiletime_error("Bad argument size for add");
 
 /*
  * Constants for operation sizes. On 32-bit, the 64-bit size it set to
@@ -31,60 +33,47 @@ extern void __xadd_wrong_size(void)
 #define        __X86_CASE_Q    -1              /* sizeof will never return -1 */
 #endif
 
+/* 
+ * An exchange-type operation, which takes a value and a pointer, and
+ * returns a the old value.
+ */
+#define __xchg_op(ptr, arg, op, lock)                                  \
+       ({                                                              \
+               __typeof__ (*(ptr)) __ret = (arg);                      \
+               switch (sizeof(*(ptr))) {                               \
+               case __X86_CASE_B:                                      \
+                       asm volatile (lock #op "b %b0, %1\n"            \
+                                     : "+r" (__ret), "+m" (*(ptr))     \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case __X86_CASE_W:                                      \
+                       asm volatile (lock #op "w %w0, %1\n"            \
+                                     : "+r" (__ret), "+m" (*(ptr))     \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case __X86_CASE_L:                                      \
+                       asm volatile (lock #op "l %0, %1\n"             \
+                                     : "+r" (__ret), "+m" (*(ptr))     \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case __X86_CASE_Q:                                      \
+                       asm volatile (lock #op "q %q0, %1\n"            \
+                                     : "+r" (__ret), "+m" (*(ptr))     \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               default:                                                \
+                       __ ## op ## _wrong_size();                      \
+               }                                                       \
+               __ret;                                                  \
+       })
+
 /*
  * Note: no "lock" prefix even on SMP: xchg always implies lock anyway.
  * Since this is generally used to protect other memory information, we
  * use "asm volatile" and "memory" clobbers to prevent gcc from moving
  * information around.
  */
-#define __xchg(x, ptr, size)                                           \
-({                                                                     \
-       __typeof(*(ptr)) __x = (x);                                     \
-       switch (size) {                                                 \
-       case __X86_CASE_B:                                              \
-       {                                                               \
-               volatile u8 *__ptr = (volatile u8 *)(ptr);              \
-               asm volatile("xchgb %0,%1"                              \
-                            : "=q" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case __X86_CASE_W:                                              \
-       {                                                               \
-               volatile u16 *__ptr = (volatile u16 *)(ptr);            \
-               asm volatile("xchgw %0,%1"                              \
-                            : "=r" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case __X86_CASE_L:                                              \
-       {                                                               \
-               volatile u32 *__ptr = (volatile u32 *)(ptr);            \
-               asm volatile("xchgl %0,%1"                              \
-                            : "=r" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       case __X86_CASE_Q:                                              \
-       {                                                               \
-               volatile u64 *__ptr = (volatile u64 *)(ptr);            \
-               asm volatile("xchgq %0,%1"                              \
-                            : "=r" (__x), "+m" (*__ptr)                \
-                            : "0" (__x)                                \
-                            : "memory");                               \
-               break;                                                  \
-       }                                                               \
-       default:                                                        \
-               __xchg_wrong_size();                                    \
-       }                                                               \
-       __x;                                                            \
-})
-
-#define xchg(ptr, v)                                                   \
-       __xchg((v), (ptr), sizeof(*ptr))
+#define xchg(ptr, v)   __xchg_op((ptr), (v), xchg, "")
 
 /*
  * Atomic compare and exchange.  Compare OLD with MEM, if identical,
@@ -165,46 +154,80 @@ extern void __xadd_wrong_size(void)
        __cmpxchg_local((ptr), (old), (new), sizeof(*ptr))
 #endif
 
-#define __xadd(ptr, inc, lock)                                         \
+/*
+ * xadd() adds "inc" to "*ptr" and atomically returns the previous
+ * value of "*ptr".
+ *
+ * xadd() is locked when multiple CPUs are online
+ * xadd_sync() is always locked
+ * xadd_local() is never locked
+ */
+#define __xadd(ptr, inc, lock) __xchg_op((ptr), (inc), xadd, lock)
+#define xadd(ptr, inc)         __xadd((ptr), (inc), LOCK_PREFIX)
+#define xadd_sync(ptr, inc)    __xadd((ptr), (inc), "lock; ")
+#define xadd_local(ptr, inc)   __xadd((ptr), (inc), "")
+
+#define __add(ptr, inc, lock)                                          \
        ({                                                              \
                __typeof__ (*(ptr)) __ret = (inc);                      \
                switch (sizeof(*(ptr))) {                               \
                case __X86_CASE_B:                                      \
-                       asm volatile (lock "xaddb %b0, %1\n"            \
-                                     : "+r" (__ret), "+m" (*(ptr))     \
-                                     : : "memory", "cc");              \
+                       asm volatile (lock "addb %b1, %0\n"             \
+                                     : "+m" (*(ptr)) : "ri" (inc)      \
+                                     : "memory", "cc");                \
                        break;                                          \
                case __X86_CASE_W:                                      \
-                       asm volatile (lock "xaddw %w0, %1\n"            \
-                                     : "+r" (__ret), "+m" (*(ptr))     \
-                                     : : "memory", "cc");              \
+                       asm volatile (lock "addw %w1, %0\n"             \
+                                     : "+m" (*(ptr)) : "ri" (inc)      \
+                                     : "memory", "cc");                \
                        break;                                          \
                case __X86_CASE_L:                                      \
-                       asm volatile (lock "xaddl %0, %1\n"             \
-                                     : "+r" (__ret), "+m" (*(ptr))     \
-                                     : : "memory", "cc");              \
+                       asm volatile (lock "addl %1, %0\n"              \
+                                     : "+m" (*(ptr)) : "ri" (inc)      \
+                                     : "memory", "cc");                \
                        break;                                          \
                case __X86_CASE_Q:                                      \
-                       asm volatile (lock "xaddq %q0, %1\n"            \
-                                     : "+r" (__ret), "+m" (*(ptr))     \
-                                     : : "memory", "cc");              \
+                       asm volatile (lock "addq %1, %0\n"              \
+                                     : "+m" (*(ptr)) : "ri" (inc)      \
+                                     : "memory", "cc");                \
                        break;                                          \
                default:                                                \
-                       __xadd_wrong_size();                            \
+                       __add_wrong_size();                             \
                }                                                       \
                __ret;                                                  \
        })
 
 /*
- * xadd() adds "inc" to "*ptr" and atomically returns the previous
- * value of "*ptr".
+ * add_*() adds "inc" to "*ptr"
  *
- * xadd() is locked when multiple CPUs are online
- * xadd_sync() is always locked
- * xadd_local() is never locked
+ * __add() takes a lock prefix
+ * add_smp() is locked when multiple CPUs are online
+ * add_sync() is always locked
  */
-#define xadd(ptr, inc)         __xadd((ptr), (inc), LOCK_PREFIX)
-#define xadd_sync(ptr, inc)    __xadd((ptr), (inc), "lock; ")
-#define xadd_local(ptr, inc)   __xadd((ptr), (inc), "")
+#define add_smp(ptr, inc)      __add((ptr), (inc), LOCK_PREFIX)
+#define add_sync(ptr, inc)     __add((ptr), (inc), "lock; ")
+
+#define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2)                  \
+({                                                                     \
+       bool __ret;                                                     \
+       __typeof__(*(p1)) __old1 = (o1), __new1 = (n1);                 \
+       __typeof__(*(p2)) __old2 = (o2), __new2 = (n2);                 \
+       BUILD_BUG_ON(sizeof(*(p1)) != sizeof(long));                    \
+       BUILD_BUG_ON(sizeof(*(p2)) != sizeof(long));                    \
+       VM_BUG_ON((unsigned long)(p1) % (2 * sizeof(long)));            \
+       VM_BUG_ON((unsigned long)((p1) + 1) != (unsigned long)(p2));    \
+       asm volatile(pfx "cmpxchg%c4b %2; sete %0"                      \
+                    : "=a" (__ret), "+d" (__old2),                     \
+                      "+m" (*(p1)), "+m" (*(p2))                       \
+                    : "i" (2 * sizeof(long)), "a" (__old1),            \
+                      "b" (__new1), "c" (__new2));                     \
+       __ret;                                                          \
+})
+
+#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \
+       __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2)
+
+#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \
+       __cmpxchg_double(, p1, p2, o1, o2, n1, n2)
 
 #endif /* ASM_X86_CMPXCHG_H */
index fbebb07..53f4b21 100644 (file)
@@ -166,52 +166,6 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
 
 #endif
 
-#define cmpxchg8b(ptr, o1, o2, n1, n2)                         \
-({                                                             \
-       char __ret;                                             \
-       __typeof__(o2) __dummy;                                 \
-       __typeof__(*(ptr)) __old1 = (o1);                       \
-       __typeof__(o2) __old2 = (o2);                           \
-       __typeof__(*(ptr)) __new1 = (n1);                       \
-       __typeof__(o2) __new2 = (n2);                           \
-       asm volatile(LOCK_PREFIX "cmpxchg8b %2; setz %1"        \
-                      : "=d"(__dummy), "=a" (__ret), "+m" (*ptr)\
-                      : "a" (__old1), "d"(__old2),             \
-                        "b" (__new1), "c" (__new2)             \
-                      : "memory");                             \
-       __ret; })
-
-
-#define cmpxchg8b_local(ptr, o1, o2, n1, n2)                   \
-({                                                             \
-       char __ret;                                             \
-       __typeof__(o2) __dummy;                                 \
-       __typeof__(*(ptr)) __old1 = (o1);                       \
-       __typeof__(o2) __old2 = (o2);                           \
-       __typeof__(*(ptr)) __new1 = (n1);                       \
-       __typeof__(o2) __new2 = (n2);                           \
-       asm volatile("cmpxchg8b %2; setz %1"                    \
-                      : "=d"(__dummy), "=a"(__ret), "+m" (*ptr)\
-                      : "a" (__old), "d"(__old2),              \
-                        "b" (__new1), "c" (__new2),            \
-                      : "memory");                             \
-       __ret; })
-
-
-#define cmpxchg_double(ptr, o1, o2, n1, n2)                            \
-({                                                                     \
-       BUILD_BUG_ON(sizeof(*(ptr)) != 4);                              \
-       VM_BUG_ON((unsigned long)(ptr) % 8);                            \
-       cmpxchg8b((ptr), (o1), (o2), (n1), (n2));                       \
-})
-
-#define cmpxchg_double_local(ptr, o1, o2, n1, n2)                      \
-({                                                                     \
-       BUILD_BUG_ON(sizeof(*(ptr)) != 4);                              \
-       VM_BUG_ON((unsigned long)(ptr) % 8);                            \
-       cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2));                        \
-})
-
 #define system_has_cmpxchg_double() cpu_has_cx8
 
 #endif /* _ASM_X86_CMPXCHG_32_H */
index 285da02..614be87 100644 (file)
@@ -20,49 +20,6 @@ static inline void set_64bit(volatile u64 *ptr, u64 val)
        cmpxchg_local((ptr), (o), (n));                                 \
 })
 
-#define cmpxchg16b(ptr, o1, o2, n1, n2)                                \
-({                                                             \
-       char __ret;                                             \
-       __typeof__(o2) __junk;                                  \
-       __typeof__(*(ptr)) __old1 = (o1);                       \
-       __typeof__(o2) __old2 = (o2);                           \
-       __typeof__(*(ptr)) __new1 = (n1);                       \
-       __typeof__(o2) __new2 = (n2);                           \
-       asm volatile(LOCK_PREFIX "cmpxchg16b %2;setz %1"        \
-                      : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
-                      : "b"(__new1), "c"(__new2),              \
-                        "a"(__old1), "d"(__old2));             \
-       __ret; })
-
-
-#define cmpxchg16b_local(ptr, o1, o2, n1, n2)                  \
-({                                                             \
-       char __ret;                                             \
-       __typeof__(o2) __junk;                                  \
-       __typeof__(*(ptr)) __old1 = (o1);                       \
-       __typeof__(o2) __old2 = (o2);                           \
-       __typeof__(*(ptr)) __new1 = (n1);                       \
-       __typeof__(o2) __new2 = (n2);                           \
-       asm volatile("cmpxchg16b %2;setz %1"                    \
-                      : "=d"(__junk), "=a"(__ret), "+m" (*ptr) \
-                      : "b"(__new1), "c"(__new2),              \
-                        "a"(__old1), "d"(__old2));             \
-       __ret; })
-
-#define cmpxchg_double(ptr, o1, o2, n1, n2)                            \
-({                                                                     \
-       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
-       VM_BUG_ON((unsigned long)(ptr) % 16);                           \
-       cmpxchg16b((ptr), (o1), (o2), (n1), (n2));                      \
-})
-
-#define cmpxchg_double_local(ptr, o1, o2, n1, n2)                      \
-({                                                                     \
-       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
-       VM_BUG_ON((unsigned long)(ptr) % 16);                           \
-       cmpxchg16b_local((ptr), (o1), (o2), (n1), (n2));                \
-})
-
 #define system_has_cmpxchg_double() cpu_has_cx16
 
 #endif /* _ASM_X86_CMPXCHG_64_H */
index 9a2d644..ced283a 100644 (file)
@@ -4,6 +4,7 @@
 #ifdef CONFIG_X86_32
 
 #include <linux/types.h>
+#include <linux/log2.h>
 
 /*
  * do_div() is NOT a C function. It wants to return
 ({                                                             \
        unsigned long __upper, __low, __high, __mod, __base;    \
        __base = (base);                                        \
-       asm("":"=a" (__low), "=d" (__high) : "A" (n));          \
-       __upper = __high;                                       \
-       if (__high) {                                           \
-               __upper = __high % (__base);                    \
-               __high = __high / (__base);                     \
+       if (__builtin_constant_p(__base) && is_power_of_2(__base)) { \
+               __mod = n & (__base - 1);                       \
+               n >>= ilog2(__base);                            \
+       } else {                                                \
+               asm("" : "=a" (__low), "=d" (__high) : "A" (n));\
+               __upper = __high;                               \
+               if (__high) {                                   \
+                       __upper = __high % (__base);            \
+                       __high = __high / (__base);             \
+               }                                               \
+               asm("divl %2" : "=a" (__low), "=d" (__mod)      \
+                       : "rm" (__base), "0" (__low), "1" (__upper));   \
+               asm("" : "=A" (n) : "a" (__low), "d" (__high)); \
        }                                                       \
-       asm("divl %2":"=a" (__low), "=d" (__mod)                \
-           : "rm" (__base), "0" (__low), "1" (__upper));       \
-       asm("":"=A" (n) : "a" (__low), "d" (__high));           \
        __mod;                                                  \
 })
 
index 908b969..3778256 100644 (file)
@@ -117,7 +117,7 @@ static inline void early_memtest(unsigned long start, unsigned long end)
 
 extern unsigned long e820_end_of_ram_pfn(void);
 extern unsigned long e820_end_of_low_ram_pfn(void);
-extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
+extern u64 early_reserve_e820(u64 sizet, u64 align);
 
 void memblock_x86_fill(void);
 void memblock_find_dma_reserve(void);
index 55e4de6..da0b3ca 100644 (file)
@@ -11,6 +11,7 @@ typedef struct {
 #ifdef CONFIG_X86_LOCAL_APIC
        unsigned int apic_timer_irqs;   /* arch dependent */
        unsigned int irq_spurious_count;
+       unsigned int icr_read_retry_count;
 #endif
        unsigned int x86_platform_ipis; /* arch dependent */
        unsigned int apic_perf_irqs;
index c9e09ea..6919e93 100644 (file)
@@ -218,7 +218,7 @@ static inline void fpu_fxsave(struct fpu *fpu)
 #ifdef CONFIG_SMP
 #define safe_address (__per_cpu_offset[0])
 #else
-#define safe_address (kstat_cpu(0).cpustat.user)
+#define safe_address (__get_cpu_var(kernel_cpustat).cpustat[CPUTIME_USER])
 #endif
 
 /*
index 88c765e..74df3f1 100644 (file)
@@ -137,6 +137,13 @@ static inline int insn_is_avx(struct insn *insn)
        return (insn->vex_prefix.value != 0);
 }
 
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+       return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+               insn->displacement.got && insn->immediate.got;
+}
+
 static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
 {
        if (insn->vex_prefix.nbytes == 2)       /* 2 bytes VEX */
index 8537285..88d0c3c 100644 (file)
@@ -15,7 +15,7 @@
 
 #define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
 #define CALIBRATE_LATCH        \
-       ((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
+       ((PIT_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
 
 static inline void mach_prepare_counter(void)
 {
index 01fdf56..0e8e85b 100644 (file)
@@ -81,8 +81,8 @@ static inline unsigned char current_lock_cmos_reg(void)
 #else
 #define lock_cmos_prefix(reg) do {} while (0)
 #define lock_cmos_suffix(reg) do {} while (0)
-#define lock_cmos(reg)
-#define unlock_cmos()
+#define lock_cmos(reg) do { } while (0)
+#define unlock_cmos() do { } while (0)
 #define do_i_have_lock_cmos() 0
 #define current_lock_cmos_reg() 0
 #endif
index 0e8ae57..6add827 100644 (file)
 #define MCJ_CTX_MASK           3
 #define MCJ_CTX(flags)         ((flags) & MCJ_CTX_MASK)
 #define MCJ_CTX_RANDOM         0    /* inject context: random */
-#define MCJ_CTX_PROCESS                1    /* inject context: process */
-#define MCJ_CTX_IRQ            2    /* inject context: IRQ */
-#define MCJ_NMI_BROADCAST      4    /* do NMI broadcasting */
-#define MCJ_EXCEPTION          8    /* raise as exception */
+#define MCJ_CTX_PROCESS                0x1  /* inject context: process */
+#define MCJ_CTX_IRQ            0x2  /* inject context: IRQ */
+#define MCJ_NMI_BROADCAST      0x4  /* do NMI broadcasting */
+#define MCJ_EXCEPTION          0x8  /* raise as exception */
+#define MCJ_IRQ_BRAODCAST      0x10 /* do IRQ broadcasting */
 
 /* Fields are zero when not available */
 struct mce {
@@ -120,7 +121,8 @@ struct mce_log {
 
 #ifdef __KERNEL__
 
-extern struct atomic_notifier_head x86_mce_decoder_chain;
+extern void mce_register_decode_chain(struct notifier_block *nb);
+extern void mce_unregister_decode_chain(struct notifier_block *nb);
 
 #include <linux/percpu.h>
 #include <linux/init.h>
diff --git a/arch/x86/include/asm/memblock.h b/arch/x86/include/asm/memblock.h
deleted file mode 100644 (file)
index 0cd3800..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _X86_MEMBLOCK_H
-#define _X86_MEMBLOCK_H
-
-#define ARCH_DISCARD_MEMBLOCK
-
-u64 memblock_x86_find_in_range_size(u64 start, u64 *sizep, u64 align);
-
-void memblock_x86_reserve_range(u64 start, u64 end, char *name);
-void memblock_x86_free_range(u64 start, u64 end);
-struct range;
-int __get_free_all_memory_range(struct range **range, int nodeid,
-                        unsigned long start_pfn, unsigned long end_pfn);
-int get_free_all_memory_range(struct range **rangep, int nodeid);
-
-void memblock_x86_register_active_regions(int nid, unsigned long start_pfn,
-                                        unsigned long last_pfn);
-u64 memblock_x86_hole_size(u64 start, u64 end);
-u64 memblock_x86_find_in_range_node(int nid, u64 start, u64 end, u64 size, u64 align);
-u64 memblock_x86_free_memory_in_range(u64 addr, u64 limit);
-u64 memblock_x86_memory_in_range(u64 addr, u64 limit);
-bool memblock_x86_check_reserved_size(u64 *addrp, u64 *sizep, u64 align);
-
-#endif
index 2421507..4ebe157 100644 (file)
@@ -48,6 +48,7 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
 
 #ifdef CONFIG_MICROCODE_AMD
 extern struct microcode_ops * __init init_amd_microcode(void);
+extern void __exit exit_amd_microcode(void);
 
 static inline void get_ucode_data(void *to, const u8 *from, size_t n)
 {
@@ -59,6 +60,7 @@ static inline struct microcode_ops * __init init_amd_microcode(void)
 {
        return NULL;
 }
+static inline void __exit exit_amd_microcode(void) {}
 #endif
 
 #endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/numachip/numachip_csr.h b/arch/x86/include/asm/numachip/numachip_csr.h
new file mode 100644 (file)
index 0000000..660f843
--- /dev/null
@@ -0,0 +1,167 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-Specific Header file
+ *
+ * Copyright (C) 2011 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ */
+
+#ifndef _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
+#define _ASM_X86_NUMACHIP_NUMACHIP_CSR_H
+
+#include <linux/numa.h>
+#include <linux/percpu.h>
+#include <linux/io.h>
+#include <linux/swab.h>
+#include <asm/types.h>
+#include <asm/processor.h>
+
+#define CSR_NODE_SHIFT         16
+#define CSR_NODE_BITS(p)       (((unsigned long)(p)) << CSR_NODE_SHIFT)
+#define CSR_NODE_MASK          0x0fff          /* 4K nodes */
+
+/* 32K CSR space, b15 indicates geo/non-geo */
+#define CSR_OFFSET_MASK        0x7fffUL
+
+/* Global CSR space covers all 4K possible nodes with 64K CSR space per node */
+#define NUMACHIP_GCSR_BASE     0x3fff00000000ULL
+#define NUMACHIP_GCSR_LIM      0x3fff0fffffffULL
+#define NUMACHIP_GCSR_SIZE     (NUMACHIP_GCSR_LIM - NUMACHIP_GCSR_BASE + 1)
+
+/*
+ * Local CSR space starts in global CSR space with "nodeid" = 0xfff0, however
+ * when using the direct mapping on x86_64, both start and size needs to be
+ * aligned with PMD_SIZE which is 2M
+ */
+#define NUMACHIP_LCSR_BASE     0x3ffffe000000ULL
+#define NUMACHIP_LCSR_LIM      0x3fffffffffffULL
+#define NUMACHIP_LCSR_SIZE     (NUMACHIP_LCSR_LIM - NUMACHIP_LCSR_BASE + 1)
+
+static inline void *gcsr_address(int node, unsigned long offset)
+{
+       return __va(NUMACHIP_GCSR_BASE | (1UL << 15) |
+               CSR_NODE_BITS(node & CSR_NODE_MASK) | (offset & CSR_OFFSET_MASK));
+}
+
+static inline void *lcsr_address(unsigned long offset)
+{
+       return __va(NUMACHIP_LCSR_BASE | (1UL << 15) |
+               CSR_NODE_BITS(0xfff0) | (offset & CSR_OFFSET_MASK));
+}
+
+static inline unsigned int read_gcsr(int node, unsigned long offset)
+{
+       return swab32(readl(gcsr_address(node, offset)));
+}
+
+static inline void write_gcsr(int node, unsigned long offset, unsigned int val)
+{
+       writel(swab32(val), gcsr_address(node, offset));
+}
+
+static inline unsigned int read_lcsr(unsigned long offset)
+{
+       return swab32(readl(lcsr_address(offset)));
+}
+
+static inline void write_lcsr(unsigned long offset, unsigned int val)
+{
+       writel(swab32(val), lcsr_address(offset));
+}
+
+/* ========================================================================= */
+/*                   CSR_G0_STATE_CLEAR                                      */
+/* ========================================================================= */
+
+#define CSR_G0_STATE_CLEAR (0x000 + (0 << 12))
+union numachip_csr_g0_state_clear {
+       unsigned int v;
+       struct numachip_csr_g0_state_clear_s {
+               unsigned int _state:2;
+               unsigned int _rsvd_2_6:5;
+               unsigned int _lost:1;
+               unsigned int _rsvd_8_31:24;
+       } s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G0_NODE_IDS                                         */
+/* ========================================================================= */
+
+#define CSR_G0_NODE_IDS (0x008 + (0 << 12))
+union numachip_csr_g0_node_ids {
+       unsigned int v;
+       struct numachip_csr_g0_node_ids_s {
+               unsigned int _initialid:16;
+               unsigned int _nodeid:12;
+               unsigned int _rsvd_28_31:4;
+       } s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_EXT_IRQ_GEN                                      */
+/* ========================================================================= */
+
+#define CSR_G3_EXT_IRQ_GEN (0x030 + (3 << 12))
+union numachip_csr_g3_ext_irq_gen {
+       unsigned int v;
+       struct numachip_csr_g3_ext_irq_gen_s {
+               unsigned int _vector:8;
+               unsigned int _msgtype:3;
+               unsigned int _index:5;
+               unsigned int _destination_apic_id:16;
+       } s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_EXT_IRQ_STATUS                                   */
+/* ========================================================================= */
+
+#define CSR_G3_EXT_IRQ_STATUS (0x034 + (3 << 12))
+union numachip_csr_g3_ext_irq_status {
+       unsigned int v;
+       struct numachip_csr_g3_ext_irq_status_s {
+               unsigned int _result:32;
+       } s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_EXT_IRQ_DEST                                     */
+/* ========================================================================= */
+
+#define CSR_G3_EXT_IRQ_DEST (0x038 + (3 << 12))
+union numachip_csr_g3_ext_irq_dest {
+       unsigned int v;
+       struct numachip_csr_g3_ext_irq_dest_s {
+               unsigned int _irq:8;
+               unsigned int _rsvd_8_31:24;
+       } s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_NC_ATT_MAP_SELECT                                */
+/* ========================================================================= */
+
+#define CSR_G3_NC_ATT_MAP_SELECT (0x7fc + (3 << 12))
+union numachip_csr_g3_nc_att_map_select {
+       unsigned int v;
+       struct numachip_csr_g3_nc_att_map_select_s {
+               unsigned int _upper_address_bits:4;
+               unsigned int _select_ram:4;
+               unsigned int _rsvd_8_31:24;
+       } s;
+};
+
+/* ========================================================================= */
+/*                   CSR_G3_NC_ATT_MAP_SELECT_0-255                          */
+/* ========================================================================= */
+
+#define CSR_G3_NC_ATT_MAP_SELECT_0 (0x800 + (3 << 12))
+
+#endif /* _ASM_X86_NUMACHIP_NUMACHIP_CSR_H */
+
index 3470c9d..529bf07 100644 (file)
@@ -451,23 +451,20 @@ do {                                                                      \
 #endif /* !CONFIG_M386 */
 
 #ifdef CONFIG_X86_CMPXCHG64
-#define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)                  \
+#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2)            \
 ({                                                                     \
-       char __ret;                                                     \
-       typeof(o1) __o1 = o1;                                           \
-       typeof(o1) __n1 = n1;                                           \
-       typeof(o2) __o2 = o2;                                           \
-       typeof(o2) __n2 = n2;                                           \
-       typeof(o2) __dummy = n2;                                        \
+       bool __ret;                                                     \
+       typeof(pcp1) __o1 = (o1), __n1 = (n1);                          \
+       typeof(pcp2) __o2 = (o2), __n2 = (n2);                          \
        asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t"       \
-                   : "=a"(__ret), "=m" (pcp1), "=d"(__dummy)           \
-                   :  "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2));     \
+                   : "=a" (__ret), "+m" (pcp1), "+m" (pcp2), "+d" (__o2) \
+                   :  "b" (__n1), "c" (__n2), "a" (__o1));             \
        __ret;                                                          \
 })
 
-#define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)                percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
-#define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)          percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
-#define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2)       percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2)
+#define __this_cpu_cmpxchg_double_4    percpu_cmpxchg8b_double
+#define this_cpu_cmpxchg_double_4      percpu_cmpxchg8b_double
+#define irqsafe_cpu_cmpxchg_double_4   percpu_cmpxchg8b_double
 #endif /* CONFIG_X86_CMPXCHG64 */
 
 /*
@@ -508,31 +505,23 @@ do {                                                                      \
  * it in software.  The address used in the cmpxchg16 instruction must be
  * aligned to a 16 byte boundary.
  */
-#ifdef CONFIG_SMP
-#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP3
-#else
-#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" ASM_NOP2
-#endif
-#define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)                 \
+#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2)           \
 ({                                                                     \
-       char __ret;                                                     \
-       typeof(o1) __o1 = o1;                                           \
-       typeof(o1) __n1 = n1;                                           \
-       typeof(o2) __o2 = o2;                                           \
-       typeof(o2) __n2 = n2;                                           \
-       typeof(o2) __dummy;                                             \
-       alternative_io(CMPXCHG16B_EMU_CALL,                             \
-                      "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t",  \
+       bool __ret;                                                     \
+       typeof(pcp1) __o1 = (o1), __n1 = (n1);                          \
+       typeof(pcp2) __o2 = (o2), __n2 = (n2);                          \
+       alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
+                      "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
                       X86_FEATURE_CX16,                                \
-                      ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)),         \
-                      "S" (&pcp1), "b"(__n1), "c"(__n2),               \
-                      "a"(__o1), "d"(__o2) : "memory");                \
+                      ASM_OUTPUT2("=a" (__ret), "+m" (pcp1),           \
+                                  "+m" (pcp2), "+d" (__o2)),           \
+                      "b" (__n1), "c" (__n2), "a" (__o1) : "rsi");     \
        __ret;                                                          \
 })
 
-#define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)                percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
-#define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)          percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
-#define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2)       percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2)
+#define __this_cpu_cmpxchg_double_8    percpu_cmpxchg16b_double
+#define this_cpu_cmpxchg_double_8      percpu_cmpxchg16b_double
+#define irqsafe_cpu_cmpxchg_double_8   percpu_cmpxchg16b_double
 
 #endif
 
index f61c62f..096c975 100644 (file)
@@ -57,6 +57,7 @@
                (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
 
 #define ARCH_PERFMON_BRANCH_MISSES_RETIRED             6
+#define ARCH_PERFMON_EVENTS_COUNT                      7
 
 /*
  * Intel "Architectural Performance Monitoring" CPUID
@@ -72,6 +73,19 @@ union cpuid10_eax {
        unsigned int full;
 };
 
+union cpuid10_ebx {
+       struct {
+               unsigned int no_unhalted_core_cycles:1;
+               unsigned int no_instructions_retired:1;
+               unsigned int no_unhalted_reference_cycles:1;
+               unsigned int no_llc_reference:1;
+               unsigned int no_llc_misses:1;
+               unsigned int no_branch_instruction_retired:1;
+               unsigned int no_branch_misses_retired:1;
+       } split;
+       unsigned int full;
+};
+
 union cpuid10_edx {
        struct {
                unsigned int num_counters_fixed:5;
@@ -81,6 +95,15 @@ union cpuid10_edx {
        unsigned int full;
 };
 
+struct x86_pmu_capability {
+       int             version;
+       int             num_counters_gp;
+       int             num_counters_fixed;
+       int             bit_width_gp;
+       int             bit_width_fixed;
+       unsigned int    events_mask;
+       int             events_mask_len;
+};
 
 /*
  * Fixed-purpose performance events:
@@ -89,23 +112,24 @@ union cpuid10_edx {
 /*
  * All 3 fixed-mode PMCs are configured via this single MSR:
  */
-#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL                        0x38d
+#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL        0x38d
 
 /*
  * The counts are available in three separate MSRs:
  */
 
 /* Instr_Retired.Any: */
-#define MSR_ARCH_PERFMON_FIXED_CTR0                    0x309
-#define X86_PMC_IDX_FIXED_INSTRUCTIONS                 (X86_PMC_IDX_FIXED + 0)
+#define MSR_ARCH_PERFMON_FIXED_CTR0    0x309
+#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0)
 
 /* CPU_CLK_Unhalted.Core: */
-#define MSR_ARCH_PERFMON_FIXED_CTR1                    0x30a
-#define X86_PMC_IDX_FIXED_CPU_CYCLES                   (X86_PMC_IDX_FIXED + 1)
+#define MSR_ARCH_PERFMON_FIXED_CTR1    0x30a
+#define X86_PMC_IDX_FIXED_CPU_CYCLES   (X86_PMC_IDX_FIXED + 1)
 
 /* CPU_CLK_Unhalted.Ref: */
-#define MSR_ARCH_PERFMON_FIXED_CTR2                    0x30b
-#define X86_PMC_IDX_FIXED_BUS_CYCLES                   (X86_PMC_IDX_FIXED + 2)
+#define MSR_ARCH_PERFMON_FIXED_CTR2    0x30b
+#define X86_PMC_IDX_FIXED_REF_CYCLES   (X86_PMC_IDX_FIXED + 2)
+#define X86_PMC_MSK_FIXED_REF_CYCLES   (1ULL << X86_PMC_IDX_FIXED_REF_CYCLES)
 
 /*
  * We model BTS tracing as another fixed-mode PMC.
@@ -202,6 +226,7 @@ struct perf_guest_switch_msr {
 };
 
 extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr);
+extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
 #else
 static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
 {
@@ -209,6 +234,11 @@ static inline perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
        return NULL;
 }
 
+static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+{
+       memset(cap, 0, sizeof(*cap));
+}
+
 static inline void perf_events_lapic_init(void)        { }
 #endif
 
index 18601c8..49afb3f 100644 (file)
@@ -703,7 +703,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
        pte_update(mm, addr, ptep);
 }
 
-#define flush_tlb_fix_spurious_fault(vma, address)
+#define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
 
 #define mk_pmd(page, pgprot)   pfn_pmd(page_to_pfn(page), (pgprot))
 
index 2dddb31..f8ab3ea 100644 (file)
@@ -6,6 +6,7 @@
  * EFLAGS bits
  */
 #define X86_EFLAGS_CF  0x00000001 /* Carry Flag */
+#define X86_EFLAGS_BIT1        0x00000002 /* Bit 1 - always on */
 #define X86_EFLAGS_PF  0x00000004 /* Parity Flag */
 #define X86_EFLAGS_AF  0x00000010 /* Auxiliary carry Flag */
 #define X86_EFLAGS_ZF  0x00000040 /* Zero Flag */
index b650435..aa9088c 100644 (file)
@@ -99,7 +99,6 @@ struct cpuinfo_x86 {
        u16                     apicid;
        u16                     initial_apicid;
        u16                     x86_clflush_size;
-#ifdef CONFIG_SMP
        /* number of cores as seen by the OS: */
        u16                     booted_cores;
        /* Physical processor id: */
@@ -110,7 +109,6 @@ struct cpuinfo_x86 {
        u8                      compute_unit_id;
        /* Index into per_cpu list: */
        u16                     cpu_index;
-#endif
        u32                     microcode;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
index 972c260..a82c2bf 100644 (file)
@@ -79,23 +79,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
        return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
 }
 
-#if (NR_CPUS < 256)
 static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
 {
-       asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
-                    : "+m" (lock->head_tail)
-                    :
-                    : "memory", "cc");
+       __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
 }
-#else
-static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
-{
-       asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
-                    : "+m" (lock->head_tail)
-                    :
-                    : "memory", "cc");
-}
-#endif
 
 static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
 {
index a1fe5c1..185b719 100644 (file)
@@ -40,7 +40,8 @@ struct thread_info {
                                                */
        __u8                    supervisor_stack[0];
 #endif
-       int                     uaccess_err;
+       int                     sig_on_uaccess_error:1;
+       int                     uaccess_err:1;  /* uaccess failed */
 };
 
 #define INIT_THREAD_INFO(tsk)                  \
@@ -231,6 +232,12 @@ static inline struct thread_info *current_thread_info(void)
        movq PER_CPU_VAR(kernel_stack),reg ; \
        subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
 
+/*
+ * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
+ * a certain register (to be used in assembler memory operands).
+ */
+#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
+
 #endif
 
 #endif /* !X86_32 */
index c006924..800f77c 100644 (file)
@@ -130,10 +130,8 @@ extern void setup_node_to_cpumask_map(void);
        .balance_interval       = 1,                                    \
 }
 
-#ifdef CONFIG_X86_64
 extern int __node_distance(int, int);
 #define node_distance(a, b) __node_distance(a, b)
-#endif
 
 #else /* !CONFIG_NUMA */
 
index 83e2efd..15d9915 100644 (file)
@@ -51,6 +51,8 @@ extern int unsynchronized_tsc(void);
 extern int check_tsc_unstable(void);
 extern unsigned long native_calibrate_tsc(void);
 
+extern int tsc_clocksource_reliable;
+
 /*
  * Boot-time check whether the TSCs are synchronized across
  * all CPUs/cores:
index 36361bf..8be5f54 100644 (file)
@@ -462,7 +462,7 @@ struct __large_struct { unsigned long buf[100]; };
        barrier();
 
 #define uaccess_catch(err)                                             \
-       (err) |= current_thread_info()->uaccess_err;                    \
+       (err) |= (current_thread_info()->uaccess_err ? -EFAULT : 0);    \
        current_thread_info()->uaccess_err = prev_err;                  \
 } while (0)
 
index 1971e65..1ac860a 100644 (file)
@@ -7,6 +7,7 @@
 struct mpc_bus;
 struct mpc_cpu;
 struct mpc_table;
+struct cpuinfo_x86;
 
 /**
  * struct x86_init_mpparse - platform specific mpparse ops
@@ -147,6 +148,7 @@ struct x86_init_ops {
  */
 struct x86_cpuinit_ops {
        void (*setup_percpu_clockev)(void);
+       void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node);
 };
 
 /**
@@ -186,5 +188,6 @@ extern struct x86_msi_ops x86_msi;
 
 extern void x86_init_noop(void);
 extern void x86_init_uint_noop(unsigned int unused);
+extern void x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node);
 
 #endif
index 4558f0d..ce664f3 100644 (file)
@@ -219,6 +219,8 @@ static int __init
 acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
 {
        struct acpi_madt_local_x2apic *processor = NULL;
+       int apic_id;
+       u8 enabled;
 
        processor = (struct acpi_madt_local_x2apic *)header;
 
@@ -227,6 +229,8 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
 
        acpi_table_print_madt_entry(header);
 
+       apic_id = processor->local_apic_id;
+       enabled = processor->lapic_flags & ACPI_MADT_ENABLED;
 #ifdef CONFIG_X86_X2APIC
        /*
         * We need to register disabled CPU as well to permit
@@ -235,8 +239,10 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
         * to not preallocating memory for all NR_CPUS
         * when we use CPU hotplug.
         */
-       acpi_register_lapic(processor->local_apic_id,   /* APIC ID */
-                           processor->lapic_flags & ACPI_MADT_ENABLED);
+       if (!cpu_has_x2apic && (apic_id >= 0xff) && enabled)
+               printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
+       else
+               acpi_register_lapic(apic_id, enabled);
 #else
        printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
 #endif
index 4c39baa..013c181 100644 (file)
@@ -123,16 +123,14 @@ int amd_get_subcaches(int cpu)
 {
        struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link;
        unsigned int mask;
-       int cuid = 0;
+       int cuid;
 
        if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
                return 0;
 
        pci_read_config_dword(link, 0x1d4, &mask);
 
-#ifdef CONFIG_SMP
        cuid = cpu_data(cpu).compute_unit_id;
-#endif
        return (mask >> (4 * cuid)) & 0xf;
 }
 
@@ -141,7 +139,7 @@ int amd_set_subcaches(int cpu, int mask)
        static unsigned int reset, ban;
        struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu));
        unsigned int reg;
-       int cuid = 0;
+       int cuid;
 
        if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf)
                return -EINVAL;
@@ -159,9 +157,7 @@ int amd_set_subcaches(int cpu, int mask)
                pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000);
        }
 
-#ifdef CONFIG_SMP
        cuid = cpu_data(cpu).compute_unit_id;
-#endif
        mask <<= 4 * cuid;
        mask |= (0xf ^ (1 << cuid)) << 26;
 
index 3d2661c..6e76c19 100644 (file)
@@ -88,13 +88,13 @@ static u32 __init allocate_aperture(void)
         */
        addr = memblock_find_in_range(GART_MIN_ADDR, GART_MAX_ADDR,
                                      aper_size, aper_size);
-       if (addr == MEMBLOCK_ERROR || addr + aper_size > GART_MAX_ADDR) {
+       if (!addr || addr + aper_size > GART_MAX_ADDR) {
                printk(KERN_ERR
                        "Cannot allocate aperture memory hole (%lx,%uK)\n",
                                addr, aper_size>>10);
                return 0;
        }
-       memblock_x86_reserve_range(addr, addr + aper_size, "aperture64");
+       memblock_reserve(addr, aper_size);
        /*
         * Kmemleak should not scan this block as it may not be mapped via the
         * kernel direct mapping.
index 767fd04..0ae0323 100644 (file)
@@ -10,6 +10,7 @@ obj-$(CONFIG_SMP)             += ipi.o
 
 ifeq ($(CONFIG_X86_64),y)
 # APIC probe will depend on the listing order here
+obj-$(CONFIG_X86_NUMACHIP)     += apic_numachip.o
 obj-$(CONFIG_X86_UV)           += x2apic_uv_x.o
 obj-$(CONFIG_X86_X2APIC)       += x2apic_phys.o
 obj-$(CONFIG_X86_X2APIC)       += x2apic_cluster.o
index 2cd2d93..2eec05b 100644 (file)
@@ -146,16 +146,26 @@ __setup("apicpmtimer", setup_apicpmtimer);
 int x2apic_mode;
 #ifdef CONFIG_X86_X2APIC
 /* x2apic enabled before OS handover */
-static int x2apic_preenabled;
+int x2apic_preenabled;
+static int x2apic_disabled;
+static int nox2apic;
 static __init int setup_nox2apic(char *str)
 {
        if (x2apic_enabled()) {
-               pr_warning("Bios already enabled x2apic, "
-                          "can't enforce nox2apic");
-               return 0;
-       }
+               int apicid = native_apic_msr_read(APIC_ID);
+
+               if (apicid >= 255) {
+                       pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
+                                  apicid);
+                       return 0;
+               }
+
+               pr_warning("x2apic already enabled. will disable it\n");
+       } else
+               setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+
+       nox2apic = 1;
 
-       setup_clear_cpu_cap(X86_FEATURE_X2APIC);
        return 0;
 }
 early_param("nox2apic", setup_nox2apic);
@@ -250,6 +260,7 @@ u32 native_safe_apic_wait_icr_idle(void)
                send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
                if (!send_status)
                        break;
+               inc_irq_stat(icr_read_retry_count);
                udelay(100);
        } while (timeout++ < 1000);
 
@@ -1431,6 +1442,45 @@ void __init bsp_end_local_APIC_setup(void)
 }
 
 #ifdef CONFIG_X86_X2APIC
+/*
+ * Need to disable xapic and x2apic at the same time and then enable xapic mode
+ */
+static inline void __disable_x2apic(u64 msr)
+{
+       wrmsrl(MSR_IA32_APICBASE,
+              msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
+       wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
+}
+
+static __init void disable_x2apic(void)
+{
+       u64 msr;
+
+       if (!cpu_has_x2apic)
+               return;
+
+       rdmsrl(MSR_IA32_APICBASE, msr);
+       if (msr & X2APIC_ENABLE) {
+               u32 x2apic_id = read_apic_id();
+
+               if (x2apic_id >= 255)
+                       panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
+
+               pr_info("Disabling x2apic\n");
+               __disable_x2apic(msr);
+
+               if (nox2apic) {
+                       clear_cpu_cap(&cpu_data(0), X86_FEATURE_X2APIC);
+                       setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+               }
+
+               x2apic_disabled = 1;
+               x2apic_mode = 0;
+
+               register_lapic_address(mp_lapic_addr);
+       }
+}
+
 void check_x2apic(void)
 {
        if (x2apic_enabled()) {
@@ -1441,15 +1491,20 @@ void check_x2apic(void)
 
 void enable_x2apic(void)
 {
-       int msr, msr2;
+       u64 msr;
+
+       rdmsrl(MSR_IA32_APICBASE, msr);
+       if (x2apic_disabled) {
+               __disable_x2apic(msr);
+               return;
+       }
 
        if (!x2apic_mode)
                return;
 
-       rdmsr(MSR_IA32_APICBASE, msr, msr2);
        if (!(msr & X2APIC_ENABLE)) {
                printk_once(KERN_INFO "Enabling x2apic\n");
-               wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, msr2);
+               wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
        }
 }
 #endif /* CONFIG_X86_X2APIC */
@@ -1486,25 +1541,34 @@ void __init enable_IR_x2apic(void)
        ret = save_ioapic_entries();
        if (ret) {
                pr_info("Saving IO-APIC state failed: %d\n", ret);
-               goto out;
+               return;
        }
 
        local_irq_save(flags);
        legacy_pic->mask_all();
        mask_ioapic_entries();
 
+       if (x2apic_preenabled && nox2apic)
+               disable_x2apic();
+
        if (dmar_table_init_ret)
                ret = -1;
        else
                ret = enable_IR();
 
+       if (!x2apic_supported())
+               goto skip_x2apic;
+
        if (ret < 0) {
                /* IR is required if there is APIC ID > 255 even when running
                 * under KVM
                 */
                if (max_physical_apicid > 255 ||
-                   !hypervisor_x2apic_available())
-                       goto nox2apic;
+                   !hypervisor_x2apic_available()) {
+                       if (x2apic_preenabled)
+                               disable_x2apic();
+                       goto skip_x2apic;
+               }
                /*
                 * without IR all CPUs can be addressed by IOAPIC/MSI
                 * only in physical mode
@@ -1512,8 +1576,10 @@ void __init enable_IR_x2apic(void)
                x2apic_force_phys();
        }
 
-       if (ret == IRQ_REMAP_XAPIC_MODE)
-               goto nox2apic;
+       if (ret == IRQ_REMAP_XAPIC_MODE) {
+               pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
+               goto skip_x2apic;
+       }
 
        x2apic_enabled = 1;
 
@@ -1523,22 +1589,11 @@ void __init enable_IR_x2apic(void)
                pr_info("Enabled x2apic\n");
        }
 
-nox2apic:
+skip_x2apic:
        if (ret < 0) /* IR enabling failed */
                restore_ioapic_entries();
        legacy_pic->restore_mask();
        local_irq_restore(flags);
-
-out:
-       if (x2apic_enabled || !x2apic_supported())
-               return;
-
-       if (x2apic_preenabled)
-               panic("x2apic: enabled by BIOS but kernel init failed.");
-       else if (ret == IRQ_REMAP_XAPIC_MODE)
-               pr_info("x2apic not enabled, IRQ remapping is in xapic mode\n");
-       else if (ret < 0)
-               pr_info("x2apic not enabled, IRQ remapping init failed\n");
 }
 
 #ifdef CONFIG_X86_64
index f7a41e4..8c3cdde 100644 (file)
@@ -62,7 +62,7 @@ static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
  * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
  * document number 292116).  So here it goes...
  */
-static void flat_init_apic_ldr(void)
+void flat_init_apic_ldr(void)
 {
        unsigned long val;
        unsigned long num, id;
@@ -171,9 +171,14 @@ static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
        return initial_apic_id >> index_msb;
 }
 
+static int flat_probe(void)
+{
+       return 1;
+}
+
 static struct apic apic_flat =  {
        .name                           = "flat",
-       .probe                          = NULL,
+       .probe                          = flat_probe,
        .acpi_madt_oem_check            = flat_acpi_madt_oem_check,
        .apic_id_registered             = flat_apic_id_registered,
 
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
new file mode 100644 (file)
index 0000000..09d3d8c
--- /dev/null
@@ -0,0 +1,294 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-Specific APIC Code
+ *
+ * Copyright (C) 2011 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <linux/delay.h>
+
+#include <asm/numachip/numachip_csr.h>
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/apic_flat_64.h>
+
+static int numachip_system __read_mostly;
+
+static struct apic apic_numachip __read_mostly;
+
+static unsigned int get_apic_id(unsigned long x)
+{
+       unsigned long value;
+       unsigned int id;
+
+       rdmsrl(MSR_FAM10H_NODE_ID, value);
+       id = ((x >> 24) & 0xffU) | ((value << 2) & 0x3f00U);
+
+       return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+       unsigned long x;
+
+       x = ((id & 0xffU) << 24);
+       return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+       return get_apic_id(apic_read(APIC_ID));
+}
+
+static int numachip_apic_id_registered(void)
+{
+       return physid_isset(read_xapic_id(), phys_cpu_present_map);
+}
+
+static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+       return initial_apic_id >> index_msb;
+}
+
+static const struct cpumask *numachip_target_cpus(void)
+{
+       return cpu_online_mask;
+}
+
+static void numachip_vector_allocation_domain(int cpu, struct cpumask *retmask)
+{
+       cpumask_clear(retmask);
+       cpumask_set_cpu(cpu, retmask);
+}
+
+static int __cpuinit numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+{
+       union numachip_csr_g3_ext_irq_gen int_gen;
+
+       int_gen.s._destination_apic_id = phys_apicid;
+       int_gen.s._vector = 0;
+       int_gen.s._msgtype = APIC_DM_INIT >> 8;
+       int_gen.s._index = 0;
+
+       write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+
+       int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
+       int_gen.s._vector = start_rip >> 12;
+
+       write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+
+       atomic_set(&init_deasserted, 1);
+       return 0;
+}
+
+static void numachip_send_IPI_one(int cpu, int vector)
+{
+       union numachip_csr_g3_ext_irq_gen int_gen;
+       int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+
+       int_gen.s._destination_apic_id = apicid;
+       int_gen.s._vector = vector;
+       int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
+       int_gen.s._index = 0;
+
+       write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+}
+
+static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+       unsigned int cpu;
+
+       for_each_cpu(cpu, mask)
+               numachip_send_IPI_one(cpu, vector);
+}
+
+static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask,
+                                               int vector)
+{
+       unsigned int this_cpu = smp_processor_id();
+       unsigned int cpu;
+
+       for_each_cpu(cpu, mask) {
+               if (cpu != this_cpu)
+                       numachip_send_IPI_one(cpu, vector);
+       }
+}
+
+static void numachip_send_IPI_allbutself(int vector)
+{
+       unsigned int this_cpu = smp_processor_id();
+       unsigned int cpu;
+
+       for_each_online_cpu(cpu) {
+               if (cpu != this_cpu)
+                       numachip_send_IPI_one(cpu, vector);
+       }
+}
+
+static void numachip_send_IPI_all(int vector)
+{
+       numachip_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static void numachip_send_IPI_self(int vector)
+{
+       __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
+
+static unsigned int numachip_cpu_mask_to_apicid(const struct cpumask *cpumask)
+{
+       int cpu;
+
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       cpu = cpumask_first(cpumask);
+       if (likely((unsigned)cpu < nr_cpu_ids))
+               return per_cpu(x86_cpu_to_apicid, cpu);
+
+       return BAD_APICID;
+}
+
+static unsigned int
+numachip_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+                               const struct cpumask *andmask)
+{
+       int cpu;
+
+       /*
+        * We're using fixed IRQ delivery, can only return one phys APIC ID.
+        * May as well be the first.
+        */
+       for_each_cpu_and(cpu, cpumask, andmask) {
+               if (cpumask_test_cpu(cpu, cpu_online_mask))
+                       break;
+       }
+       return per_cpu(x86_cpu_to_apicid, cpu);
+}
+
+static int __init numachip_probe(void)
+{
+       return apic == &apic_numachip;
+}
+
+static void __init map_csrs(void)
+{
+       printk(KERN_INFO "NumaChip: Mapping local CSR space (%016llx - %016llx)\n",
+               NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_BASE + NUMACHIP_LCSR_SIZE - 1);
+       init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
+
+       printk(KERN_INFO "NumaChip: Mapping global CSR space (%016llx - %016llx)\n",
+               NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_BASE + NUMACHIP_GCSR_SIZE - 1);
+       init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
+}
+
+static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
+{
+       c->phys_proc_id = node;
+       per_cpu(cpu_llc_id, smp_processor_id()) = node;
+}
+
+static int __init numachip_system_init(void)
+{
+       unsigned int val;
+
+       if (!numachip_system)
+               return 0;
+
+       x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+
+       map_csrs();
+
+       val = read_lcsr(CSR_G0_NODE_IDS);
+       printk(KERN_INFO "NumaChip: Local NodeID = %08x\n", val);
+
+       return 0;
+}
+early_initcall(numachip_system_init);
+
+static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+       if (!strncmp(oem_id, "NUMASC", 6)) {
+               numachip_system = 1;
+               return 1;
+       }
+
+       return 0;
+}
+
+static struct apic apic_numachip __refconst = {
+
+       .name                           = "NumaConnect system",
+       .probe                          = numachip_probe,
+       .acpi_madt_oem_check            = numachip_acpi_madt_oem_check,
+       .apic_id_registered             = numachip_apic_id_registered,
+
+       .irq_delivery_mode              = dest_Fixed,
+       .irq_dest_mode                  = 0, /* physical */
+
+       .target_cpus                    = numachip_target_cpus,
+       .disable_esr                    = 0,
+       .dest_logical                   = 0,
+       .check_apicid_used              = NULL,
+       .check_apicid_present           = NULL,
+
+       .vector_allocation_domain       = numachip_vector_allocation_domain,
+       .init_apic_ldr                  = flat_init_apic_ldr,
+
+       .ioapic_phys_id_map             = NULL,
+       .setup_apic_routing             = NULL,
+       .multi_timer_check              = NULL,
+       .cpu_present_to_apicid          = default_cpu_present_to_apicid,
+       .apicid_to_cpu_present          = NULL,
+       .setup_portio_remap             = NULL,
+       .check_phys_apicid_present      = default_check_phys_apicid_present,
+       .enable_apic_mode               = NULL,
+       .phys_pkg_id                    = numachip_phys_pkg_id,
+       .mps_oem_check                  = NULL,
+
+       .get_apic_id                    = get_apic_id,
+       .set_apic_id                    = set_apic_id,
+       .apic_id_mask                   = 0xffU << 24,
+
+       .cpu_mask_to_apicid             = numachip_cpu_mask_to_apicid,
+       .cpu_mask_to_apicid_and         = numachip_cpu_mask_to_apicid_and,
+
+       .send_IPI_mask                  = numachip_send_IPI_mask,
+       .send_IPI_mask_allbutself       = numachip_send_IPI_mask_allbutself,
+       .send_IPI_allbutself            = numachip_send_IPI_allbutself,
+       .send_IPI_all                   = numachip_send_IPI_all,
+       .send_IPI_self                  = numachip_send_IPI_self,
+
+       .wakeup_secondary_cpu           = numachip_wakeup_secondary,
+       .trampoline_phys_low            = DEFAULT_TRAMPOLINE_PHYS_LOW,
+       .trampoline_phys_high           = DEFAULT_TRAMPOLINE_PHYS_HIGH,
+       .wait_for_init_deassert         = NULL,
+       .smp_callin_clear_local_apic    = NULL,
+       .inquire_remote_apic            = NULL, /* REMRD not supported */
+
+       .read                           = native_apic_mem_read,
+       .write                          = native_apic_mem_write,
+       .icr_read                       = native_apic_icr_read,
+       .icr_write                      = native_apic_icr_write,
+       .wait_icr_idle                  = native_apic_wait_icr_idle,
+       .safe_wait_icr_idle             = native_safe_apic_wait_icr_idle,
+};
+apic_driver(apic_numachip);
+
index 8980555..fb07275 100644 (file)
@@ -2948,6 +2948,10 @@ static inline void __init check_timer(void)
        }
        local_irq_disable();
        apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
+       if (x2apic_preenabled)
+               apic_printk(APIC_QUIET, KERN_INFO
+                           "Perhaps problem with the pre-enabled x2apic mode\n"
+                           "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
        panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
                "report.  Then try booting with the 'noapic' option.\n");
 out:
index 452932d..5da1269 100644 (file)
@@ -62,7 +62,8 @@ early_param("memory_corruption_check_size", set_corruption_check_size);
 
 void __init setup_bios_corruption_check(void)
 {
-       u64 addr = PAGE_SIZE;   /* assume first page is reserved anyway */
+       phys_addr_t start, end;
+       u64 i;
 
        if (memory_corruption_check == -1) {
                memory_corruption_check =
@@ -82,28 +83,23 @@ void __init setup_bios_corruption_check(void)
 
        corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
 
-       while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
-               u64 size;
-               addr = memblock_x86_find_in_range_size(addr, &size, PAGE_SIZE);
+       for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) {
+               start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE),
+                               PAGE_SIZE, corruption_check_size);
+               end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE),
+                             PAGE_SIZE, corruption_check_size);
+               if (start >= end)
+                       continue;
 
-               if (addr == MEMBLOCK_ERROR)
-                       break;
-
-               if (addr >= corruption_check_size)
-                       break;
-
-               if ((addr + size) > corruption_check_size)
-                       size = corruption_check_size - addr;
-
-               memblock_x86_reserve_range(addr, addr + size, "SCAN RAM");
-               scan_areas[num_scan_areas].addr = addr;
-               scan_areas[num_scan_areas].size = size;
-               num_scan_areas++;
+               memblock_reserve(start, end - start);
+               scan_areas[num_scan_areas].addr = start;
+               scan_areas[num_scan_areas].size = end - start;
 
                /* Assume we've already mapped this early memory */
-               memset(__va(addr), 0, size);
+               memset(__va(start), 0, end - start);
 
-               addr += size;
+               if (++num_scan_areas >= MAX_SCAN_AREAS)
+                       break;
        }
 
        if (num_scan_areas)
index 0bab2b1..f4773f4 100644 (file)
@@ -148,7 +148,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
 
 static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_SMP
        /* calling is from identify_secondary_cpu() ? */
        if (!c->cpu_index)
                return;
@@ -192,7 +191,6 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
 
 valid_k7:
        ;
-#endif
 }
 
 static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
@@ -353,6 +351,13 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
        if (node == NUMA_NO_NODE)
                node = per_cpu(cpu_llc_id, cpu);
 
+       /*
+        * If core numbers are inconsistent, it's likely a multi-fabric platform,
+        * so invoke platform-specific handler
+        */
+       if (c->phys_proc_id != node)
+               x86_cpuinit.fixup_cpu_id(c, node);
+
        if (!node_online(node)) {
                /*
                 * Two possibilities here:
index e58d978..159103c 100644 (file)
@@ -278,7 +278,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
        }
 #ifdef CONFIG_X86_32
        /* Cyrix III family needs CX8 & PGE explicitly enabled. */
-       if (c->x86_model >= 6 && c->x86_model <= 9) {
+       if (c->x86_model >= 6 && c->x86_model <= 13) {
                rdmsr(MSR_VIA_FCR, lo, hi);
                lo |= (1<<1 | 1<<7);
                wrmsr(MSR_VIA_FCR, lo, hi);
index aa003b1..850f296 100644 (file)
@@ -676,9 +676,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
        if (this_cpu->c_early_init)
                this_cpu->c_early_init(c);
 
-#ifdef CONFIG_SMP
        c->cpu_index = 0;
-#endif
        filter_cpuid_features(c, false);
 
        setup_smep(c);
@@ -764,10 +762,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
                c->apicid = c->initial_apicid;
 # endif
 #endif
-
-#ifdef CONFIG_X86_HT
                c->phys_proc_id = c->initial_apicid;
-#endif
        }
 
        setup_smep(c);
@@ -1141,6 +1136,15 @@ static void dbg_restore_debug_regs(void)
 #endif /* ! CONFIG_KGDB */
 
 /*
+ * Prints an error where the NUMA and configured core-number mismatch and the
+ * platform didn't override this to fix it up
+ */
+void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node)
+{
+       pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id);
+}
+
+/*
  * cpu_init() initializes state that is per-CPU. Some data is already
  * initialized (naturally) in the bootstrap process, such as the GDT
  * and IDT. We reload them nevertheless, this function acts as a
index 1b22dcc..8bacc78 100644 (file)
@@ -1,5 +1,4 @@
 #ifndef ARCH_X86_CPU_H
-
 #define ARCH_X86_CPU_H
 
 struct cpu_model_info {
@@ -35,6 +34,4 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
 
 extern void get_cpu_cap(struct cpuinfo_x86 *c);
 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
-extern void get_cpu_cap(struct cpuinfo_x86 *c);
-
-#endif
+#endif /* ARCH_X86_CPU_H */
index 5231312..3e6ff6c 100644 (file)
@@ -181,7 +181,6 @@ static void __cpuinit trap_init_f00f_bug(void)
 
 static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
 {
-#ifdef CONFIG_SMP
        /* calling is from identify_secondary_cpu() ? */
        if (!c->cpu_index)
                return;
@@ -198,7 +197,6 @@ static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
                WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
                                    "with B stepping processors.\n");
        }
-#endif
 }
 
 static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
index 319882e..fc4beb3 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/fs.h>
+#include <linux/preempt.h>
 #include <linux/smp.h>
 #include <linux/notifier.h>
 #include <linux/kdebug.h>
@@ -92,6 +93,18 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
        return NMI_HANDLED;
 }
 
+static void mce_irq_ipi(void *info)
+{
+       int cpu = smp_processor_id();
+       struct mce *m = &__get_cpu_var(injectm);
+
+       if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
+                       m->inject_flags & MCJ_EXCEPTION) {
+               cpumask_clear_cpu(cpu, mce_inject_cpumask);
+               raise_exception(m, NULL);
+       }
+}
+
 /* Inject mce on current CPU */
 static int raise_local(void)
 {
@@ -139,9 +152,10 @@ static void raise_mce(struct mce *m)
                return;
 
 #ifdef CONFIG_X86_LOCAL_APIC
-       if (m->inject_flags & MCJ_NMI_BROADCAST) {
+       if (m->inject_flags & (MCJ_IRQ_BRAODCAST | MCJ_NMI_BROADCAST)) {
                unsigned long start;
                int cpu;
+
                get_online_cpus();
                cpumask_copy(mce_inject_cpumask, cpu_online_mask);
                cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
@@ -151,13 +165,25 @@ static void raise_mce(struct mce *m)
                            MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
                                cpumask_clear_cpu(cpu, mce_inject_cpumask);
                }
-               if (!cpumask_empty(mce_inject_cpumask))
-                       apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR);
+               if (!cpumask_empty(mce_inject_cpumask)) {
+                       if (m->inject_flags & MCJ_IRQ_BRAODCAST) {
+                               /*
+                                * don't wait because mce_irq_ipi is necessary
+                                * to be sync with following raise_local
+                                */
+                               preempt_disable();
+                               smp_call_function_many(mce_inject_cpumask,
+                                       mce_irq_ipi, NULL, 0);
+                               preempt_enable();
+                       } else if (m->inject_flags & MCJ_NMI_BROADCAST)
+                               apic->send_IPI_mask(mce_inject_cpumask,
+                                               NMI_VECTOR);
+               }
                start = jiffies;
                while (!cpumask_empty(mce_inject_cpumask)) {
                        if (!time_before(jiffies, start + 2*HZ)) {
                                printk(KERN_ERR
-                               "Timeout waiting for mce inject NMI %lx\n",
+                               "Timeout waiting for mce inject %lx\n",
                                        *cpumask_bits(mce_inject_cpumask));
                                break;
                        }
index 2af127d..cbe82b5 100644 (file)
@@ -95,13 +95,6 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait);
 static DEFINE_PER_CPU(struct mce, mces_seen);
 static int                     cpu_missing;
 
-/*
- * CPU/chipset specific EDAC code can register a notifier call here to print
- * MCE errors in a human-readable form.
- */
-ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
-EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
-
 /* MCA banks polled by the period polling timer for corrected events */
 DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
        [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
@@ -109,6 +102,12 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
 
 static DEFINE_PER_CPU(struct work_struct, mce_work);
 
+/*
+ * CPU/chipset specific EDAC code can register a notifier call here to print
+ * MCE errors in a human-readable form.
+ */
+ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+
 /* Do initial initialization of a struct mce */
 void mce_setup(struct mce *m)
 {
@@ -119,9 +118,7 @@ void mce_setup(struct mce *m)
        m->time = get_seconds();
        m->cpuvendor = boot_cpu_data.x86_vendor;
        m->cpuid = cpuid_eax(1);
-#ifdef CONFIG_SMP
        m->socketid = cpu_data(m->extcpu).phys_proc_id;
-#endif
        m->apicid = cpu_data(m->extcpu).initial_apicid;
        rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
 }
@@ -190,6 +187,57 @@ void mce_log(struct mce *mce)
        set_bit(0, &mce_need_notify);
 }
 
+static void drain_mcelog_buffer(void)
+{
+       unsigned int next, i, prev = 0;
+
+       next = rcu_dereference_check_mce(mcelog.next);
+
+       do {
+               struct mce *m;
+
+               /* drain what was logged during boot */
+               for (i = prev; i < next; i++) {
+                       unsigned long start = jiffies;
+                       unsigned retries = 1;
+
+                       m = &mcelog.entry[i];
+
+                       while (!m->finished) {
+                               if (time_after_eq(jiffies, start + 2*retries))
+                                       retries++;
+
+                               cpu_relax();
+
+                               if (!m->finished && retries >= 4) {
+                                       pr_err("MCE: skipping error being logged currently!\n");
+                                       break;
+                               }
+                       }
+                       smp_rmb();
+                       atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
+               }
+
+               memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
+               prev = next;
+               next = cmpxchg(&mcelog.next, prev, 0);
+       } while (next != prev);
+}
+
+
+void mce_register_decode_chain(struct notifier_block *nb)
+{
+       atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
+       drain_mcelog_buffer();
+}
+EXPORT_SYMBOL_GPL(mce_register_decode_chain);
+
+void mce_unregister_decode_chain(struct notifier_block *nb)
+{
+       atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
+}
+EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
+
 static void print_mce(struct mce *m)
 {
        int ret = 0;
index f547421..1d76872 100644 (file)
@@ -64,11 +64,9 @@ struct threshold_bank {
 };
 static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
 
-#ifdef CONFIG_SMP
 static unsigned char shared_bank[NR_BANKS] = {
        0, 0, 0, 0, 1
 };
-#endif
 
 static DEFINE_PER_CPU(unsigned char, bank_map);        /* see which banks are on */
 
@@ -202,10 +200,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 
                        if (!block)
                                per_cpu(bank_map, cpu) |= (1 << bank);
-#ifdef CONFIG_SMP
                        if (shared_bank[bank] && c->cpu_core_id)
                                break;
-#endif
+
                        offset = setup_APIC_mce(offset,
                                                (high & MASK_LVTOFF_HI) >> 20);
 
@@ -531,7 +528,6 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 
        sprintf(name, "threshold_bank%i", bank);
 
-#ifdef CONFIG_SMP
        if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) {   /* symlink */
                i = cpumask_first(cpu_llc_shared_mask(cpu));
 
@@ -558,7 +554,6 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 
                goto out;
        }
-#endif
 
        b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
        if (!b) {
index ce21561..39c6089 100644 (file)
@@ -323,17 +323,6 @@ device_initcall(thermal_throttle_init_device);
 
 #endif /* CONFIG_SYSFS */
 
-/*
- * Set up the most two significant bit to notify mce log that this thermal
- * event type.
- * This is a temp solution. May be changed in the future with mce log
- * infrasture.
- */
-#define CORE_THROTTLED         (0)
-#define CORE_POWER_LIMIT       ((__u64)1 << 62)
-#define PACKAGE_THROTTLED      ((__u64)2 << 62)
-#define PACKAGE_POWER_LIMIT    ((__u64)3 << 62)
-
 static void notify_thresholds(__u64 msr_val)
 {
        /* check whether the interrupt handler is defined;
@@ -363,27 +352,23 @@ static void intel_thermal_interrupt(void)
        if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
                                THERMAL_THROTTLING_EVENT,
                                CORE_LEVEL) != 0)
-               mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
+               mce_log_therm_throt_event(msr_val);
 
        if (this_cpu_has(X86_FEATURE_PLN))
-               if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
+               therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
                                        POWER_LIMIT_EVENT,
-                                       CORE_LEVEL) != 0)
-                       mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
+                                       CORE_LEVEL);
 
        if (this_cpu_has(X86_FEATURE_PTS)) {
                rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
-               if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
+               therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
                                        THERMAL_THROTTLING_EVENT,
-                                       PACKAGE_LEVEL) != 0)
-                       mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
+                                       PACKAGE_LEVEL);
                if (this_cpu_has(X86_FEATURE_PLN))
-                       if (therm_throt_process(msr_val &
+                       therm_throt_process(msr_val &
                                        PACKAGE_THERM_STATUS_POWER_LIMIT,
                                        POWER_LIMIT_EVENT,
-                                       PACKAGE_LEVEL) != 0)
-                               mce_log_therm_throt_event(PACKAGE_POWER_LIMIT
-                                                         | msr_val);
+                                       PACKAGE_LEVEL);
        }
 }
 
index 2bda212..5adce10 100644 (file)
@@ -484,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event)
        return event->pmu == &pmu;
 }
 
+/*
+ * Event scheduler state:
+ *
+ * Assign events iterating over all events and counters, beginning
+ * with events with least weights first. Keep the current iterator
+ * state in struct sched_state.
+ */
+struct sched_state {
+       int     weight;
+       int     event;          /* event index */
+       int     counter;        /* counter index */
+       int     unassigned;     /* number of events to be assigned left */
+       unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+};
+
+/* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */
+#define        SCHED_STATES_MAX        2
+
+struct perf_sched {
+       int                     max_weight;
+       int                     max_events;
+       struct event_constraint **constraints;
+       struct sched_state      state;
+       int                     saved_states;
+       struct sched_state      saved[SCHED_STATES_MAX];
+};
+
+/*
+ * Initialize interator that runs through all events and counters.
+ */
+static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c,
+                           int num, int wmin, int wmax)
+{
+       int idx;
+
+       memset(sched, 0, sizeof(*sched));
+       sched->max_events       = num;
+       sched->max_weight       = wmax;
+       sched->constraints      = c;
+
+       for (idx = 0; idx < num; idx++) {
+               if (c[idx]->weight == wmin)
+                       break;
+       }
+
+       sched->state.event      = idx;          /* start with min weight */
+       sched->state.weight     = wmin;
+       sched->state.unassigned = num;
+}
+
+static void perf_sched_save_state(struct perf_sched *sched)
+{
+       if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX))
+               return;
+
+       sched->saved[sched->saved_states] = sched->state;
+       sched->saved_states++;
+}
+
+static bool perf_sched_restore_state(struct perf_sched *sched)
+{
+       if (!sched->saved_states)
+               return false;
+
+       sched->saved_states--;
+       sched->state = sched->saved[sched->saved_states];
+
+       /* continue with next counter: */
+       clear_bit(sched->state.counter++, sched->state.used);
+
+       return true;
+}
+
+/*
+ * Select a counter for the current event to schedule. Return true on
+ * success.
+ */
+static bool __perf_sched_find_counter(struct perf_sched *sched)
+{
+       struct event_constraint *c;
+       int idx;
+
+       if (!sched->state.unassigned)
+               return false;
+
+       if (sched->state.event >= sched->max_events)
+               return false;
+
+       c = sched->constraints[sched->state.event];
+
+       /* Prefer fixed purpose counters */
+       if (x86_pmu.num_counters_fixed) {
+               idx = X86_PMC_IDX_FIXED;
+               for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) {
+                       if (!__test_and_set_bit(idx, sched->state.used))
+                               goto done;
+               }
+       }
+       /* Grab the first unused counter starting with idx */
+       idx = sched->state.counter;
+       for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) {
+               if (!__test_and_set_bit(idx, sched->state.used))
+                       goto done;
+       }
+
+       return false;
+
+done:
+       sched->state.counter = idx;
+
+       if (c->overlap)
+               perf_sched_save_state(sched);
+
+       return true;
+}
+
+static bool perf_sched_find_counter(struct perf_sched *sched)
+{
+       while (!__perf_sched_find_counter(sched)) {
+               if (!perf_sched_restore_state(sched))
+                       return false;
+       }
+
+       return true;
+}
+
+/*
+ * Go through all unassigned events and find the next one to schedule.
+ * Take events with the least weight first. Return true on success.
+ */
+static bool perf_sched_next_event(struct perf_sched *sched)
+{
+       struct event_constraint *c;
+
+       if (!sched->state.unassigned || !--sched->state.unassigned)
+               return false;
+
+       do {
+               /* next event */
+               sched->state.event++;
+               if (sched->state.event >= sched->max_events) {
+                       /* next weight */
+                       sched->state.event = 0;
+                       sched->state.weight++;
+                       if (sched->state.weight > sched->max_weight)
+                               return false;
+               }
+               c = sched->constraints[sched->state.event];
+       } while (c->weight != sched->state.weight);
+
+       sched->state.counter = 0;       /* start with first counter */
+
+       return true;
+}
+
+/*
+ * Assign a counter for each event.
+ */
+static int perf_assign_events(struct event_constraint **constraints, int n,
+                             int wmin, int wmax, int *assign)
+{
+       struct perf_sched sched;
+
+       perf_sched_init(&sched, constraints, n, wmin, wmax);
+
+       do {
+               if (!perf_sched_find_counter(&sched))
+                       break;  /* failed */
+               if (assign)
+                       assign[sched.state.event] = sched.state.counter;
+       } while (perf_sched_next_event(&sched));
+
+       return sched.state.unassigned;
+}
+
 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 {
        struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
        unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-       int i, j, w, wmax, num = 0;
+       int i, wmin, wmax, num = 0;
        struct hw_perf_event *hwc;
 
        bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
-       for (i = 0; i < n; i++) {
+       for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
                c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]);
                constraints[i] = c;
+               wmin = min(wmin, c->weight);
+               wmax = max(wmax, c->weight);
        }
 
        /*
@@ -521,60 +698,12 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
                if (assign)
                        assign[i] = hwc->idx;
        }
-       if (i == n)
-               goto done;
-
-       /*
-        * begin slow path
-        */
-
-       bitmap_zero(used_mask, X86_PMC_IDX_MAX);
 
-       /*
-        * weight = number of possible counters
-        *
-        * 1    = most constrained, only works on one counter
-        * wmax = least constrained, works on any counter
-        *
-        * assign events to counters starting with most
-        * constrained events.
-        */
-       wmax = x86_pmu.num_counters;
+       /* slow path */
+       if (i != n)
+               num = perf_assign_events(constraints, n, wmin, wmax, assign);
 
        /*
-        * when fixed event counters are present,
-        * wmax is incremented by 1 to account
-        * for one more choice
-        */
-       if (x86_pmu.num_counters_fixed)
-               wmax++;
-
-       for (w = 1, num = n; num && w <= wmax; w++) {
-               /* for each event */
-               for (i = 0; num && i < n; i++) {
-                       c = constraints[i];
-                       hwc = &cpuc->event_list[i]->hw;
-
-                       if (c->weight != w)
-                               continue;
-
-                       for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) {
-                               if (!test_bit(j, used_mask))
-                                       break;
-                       }
-
-                       if (j == X86_PMC_IDX_MAX)
-                               break;
-
-                       __set_bit(j, used_mask);
-
-                       if (assign)
-                               assign[i] = j;
-                       num--;
-               }
-       }
-done:
-       /*
         * scheduling failed or is just a simulation,
         * free resources if necessary
         */
@@ -1119,6 +1248,7 @@ static void __init pmu_check_apic(void)
 
 static int __init init_hw_perf_events(void)
 {
+       struct x86_pmu_quirk *quirk;
        struct event_constraint *c;
        int err;
 
@@ -1147,8 +1277,8 @@ static int __init init_hw_perf_events(void)
 
        pr_cont("%s PMU driver.\n", x86_pmu.name);
 
-       if (x86_pmu.quirks)
-               x86_pmu.quirks();
+       for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next)
+               quirk->func();
 
        if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
                WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
@@ -1171,12 +1301,18 @@ static int __init init_hw_perf_events(void)
 
        unconstrained = (struct event_constraint)
                __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
-                                  0, x86_pmu.num_counters);
+                                  0, x86_pmu.num_counters, 0);
 
        if (x86_pmu.event_constraints) {
+               /*
+                * event on fixed counter2 (REF_CYCLES) only works on this
+                * counter, so do not extend mask to generic counters
+                */
                for_each_event_constraint(c, x86_pmu.event_constraints) {
-                       if (c->cmask != X86_RAW_EVENT_MASK)
+                       if (c->cmask != X86_RAW_EVENT_MASK
+                           || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) {
                                continue;
+                       }
 
                        c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
                        c->weight += x86_pmu.num_counters;
@@ -1566,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
 
        return misc;
 }
+
+void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
+{
+       cap->version            = x86_pmu.version;
+       cap->num_counters_gp    = x86_pmu.num_counters;
+       cap->num_counters_fixed = x86_pmu.num_counters_fixed;
+       cap->bit_width_gp       = x86_pmu.cntval_bits;
+       cap->bit_width_fixed    = x86_pmu.cntval_bits;
+       cap->events_mask        = (unsigned int)x86_pmu.events_maskl;
+       cap->events_mask_len    = x86_pmu.events_mask_len;
+}
+EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability);
index b9698d4..8944062 100644 (file)
@@ -45,6 +45,7 @@ struct event_constraint {
        u64     code;
        u64     cmask;
        int     weight;
+       int     overlap;
 };
 
 struct amd_nb {
@@ -151,15 +152,40 @@ struct cpu_hw_events {
        void                            *kfree_on_online;
 };
 
-#define __EVENT_CONSTRAINT(c, n, m, w) {\
+#define __EVENT_CONSTRAINT(c, n, m, w, o) {\
        { .idxmsk64 = (n) },            \
        .code = (c),                    \
        .cmask = (m),                   \
        .weight = (w),                  \
+       .overlap = (o),                 \
 }
 
 #define EVENT_CONSTRAINT(c, n, m)      \
-       __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
+       __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0)
+
+/*
+ * The overlap flag marks event constraints with overlapping counter
+ * masks. This is the case if the counter mask of such an event is not
+ * a subset of any other counter mask of a constraint with an equal or
+ * higher weight, e.g.:
+ *
+ *  c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0);
+ *  c_another1 = EVENT_CONSTRAINT(0, 0x07, 0);
+ *  c_another2 = EVENT_CONSTRAINT(0, 0x38, 0);
+ *
+ * The event scheduler may not select the correct counter in the first
+ * cycle because it needs to know which subsequent events will be
+ * scheduled. It may fail to schedule the events then. So we set the
+ * overlap flag for such constraints to give the scheduler a hint which
+ * events to select for counter rescheduling.
+ *
+ * Care must be taken as the rescheduling algorithm is O(n!) which
+ * will increase scheduling cycles for an over-commited system
+ * dramatically.  The number of such EVENT_CONSTRAINT_OVERLAP() macros
+ * and its counter masks must be kept at a minimum.
+ */
+#define EVENT_CONSTRAINT_OVERLAP(c, n, m)      \
+       __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1)
 
 /*
  * Constraint on the Event code.
@@ -235,6 +261,11 @@ union perf_capabilities {
        u64     capabilities;
 };
 
+struct x86_pmu_quirk {
+       struct x86_pmu_quirk *next;
+       void (*func)(void);
+};
+
 /*
  * struct x86_pmu - generic x86 pmu
  */
@@ -259,6 +290,11 @@ struct x86_pmu {
        int             num_counters_fixed;
        int             cntval_bits;
        u64             cntval_mask;
+       union {
+                       unsigned long events_maskl;
+                       unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)];
+       };
+       int             events_mask_len;
        int             apic;
        u64             max_period;
        struct event_constraint *
@@ -268,7 +304,7 @@ struct x86_pmu {
        void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
                                                 struct perf_event *event);
        struct event_constraint *event_constraints;
-       void            (*quirks)(void);
+       struct x86_pmu_quirk *quirks;
        int             perfctr_second_write;
 
        int             (*cpu_prepare)(int cpu);
@@ -309,6 +345,15 @@ struct x86_pmu {
        struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
 };
 
+#define x86_add_quirk(func_)                                           \
+do {                                                                   \
+       static struct x86_pmu_quirk __quirk __initdata = {              \
+               .func = func_,                                          \
+       };                                                              \
+       __quirk.next = x86_pmu.quirks;                                  \
+       x86_pmu.quirks = &__quirk;                                      \
+} while (0)
+
 #define ERF_NO_HT_SHARING      1
 #define ERF_HAS_RSP_1          2