Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes...
Linus Torvalds [Mon, 20 Oct 2008 20:40:47 +0000 (13:40 -0700)]
* 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes/pci-2.6: (41 commits)
  PCI: fix pci_ioremap_bar() on s390
  PCI: fix AER capability check
  PCI: use pci_find_ext_capability everywhere
  PCI: remove #ifdef DEBUG around dev_dbg call
  PCI hotplug: fix get_##name return value problem
  PCI: document the pcie_aspm kernel parameter
  PCI: introduce an pci_ioremap(pdev, barnr) function
  powerpc/PCI: Add legacy PCI access via sysfs
  PCI: Add ability to mmap legacy_io on some platforms
  PCI: probing debug message uniformization
  PCI: support PCIe ARI capability
  PCI: centralize the capabilities code in probe.c
  PCI: centralize the capabilities code in pci-sysfs.c
  PCI: fix 64-vbit prefetchable memory resource BARs
  PCI: replace cfg space size (256/4096) by macros.
  PCI: use resource_size() everywhere.
  PCI: use same arg names in PCI_VDEVICE comment
  PCI hotplug: rpaphp: make debug var unique
  PCI: use %pF instead of print_fn_descriptor_symbol() in quirks.c
  PCI: fix hotplug get_##name return value problem
  ...

283 files changed:
CREDITS
Documentation/markers.txt
Documentation/sysrq.txt
Documentation/tracepoints.txt [new file with mode: 0644]
Documentation/tracers/mmiotrace.txt
MAINTAINERS
arch/alpha/kernel/sys_sable.c
arch/arm/mach-ixp2000/ixdp2x00.c
arch/arm/mach-omap2/irq.c
arch/arm/mach-sa1100/include/mach/ide.h [deleted file]
arch/avr32/mach-at32ap/extint.c
arch/m32r/kernel/smpboot.c
arch/powerpc/include/asm/page.h
arch/powerpc/platforms/cell/spufs/sputrace.c
arch/x86/Kconfig
arch/x86/configs/i386_defconfig
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/acpi/sleep.c
arch/x86/kernel/apic.c [moved from arch/x86/kernel/apic_32.c with 79% similarity]
arch/x86/kernel/apic_64.c [deleted file]
arch/x86/kernel/bios_uv.c
arch/x86/kernel/cpu/.gitignore [new file with mode: 0644]
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/cpufreq/longhaul.c
arch/x86/kernel/cpu/cpufreq/powernow-k6.c
arch/x86/kernel/cpu/cpufreq/powernow-k7.c
arch/x86/kernel/cpu/cpufreq/powernow-k8.c
arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/mcheck/k7.c
arch/x86/kernel/cpu/mcheck/mce_32.c
arch/x86/kernel/cpu/mcheck/non-fatal.c
arch/x86/kernel/cpu/perfctr-watchdog.c
arch/x86/kernel/efi.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/ftrace.c
arch/x86/kernel/genapic_flat_64.c
arch/x86/kernel/genx2apic_uv_x.c
arch/x86/kernel/hpet.c
arch/x86/kernel/io_apic.c [moved from arch/x86/kernel/io_apic_64.c with 68% similarity]
arch/x86/kernel/io_apic_32.c [deleted file]
arch/x86/kernel/irq.c [new file with mode: 0644]
arch/x86/kernel/irq_32.c
arch/x86/kernel/irq_64.c
arch/x86/kernel/irqinit_32.c
arch/x86/kernel/irqinit_64.c
arch/x86/kernel/quirks.c
arch/x86/kernel/setup.c
arch/x86/kernel/setup_percpu.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/uv_irq.c [new file with mode: 0644]
arch/x86/kernel/uv_sysfs.c [new file with mode: 0644]
arch/x86/kernel/visws_quirks.c
arch/x86/kernel/vmiclock_32.c
arch/x86/lguest/boot.c
arch/x86/mach-generic/bigsmp.c
arch/x86/mach-generic/es7000.c
arch/x86/mach-generic/numaq.c
arch/x86/mach-generic/summit.c
arch/x86/mach-voyager/voyager_smp.c
arch/x86/mm/mmio-mod.c
arch/x86/mm/pf_in.c
arch/x86/mm/testmmiotrace.c
arch/x86/xen/irq.c
arch/x86/xen/spinlock.c
crypto/async_tx/async_tx.c
drivers/char/agp/ali-agp.c
drivers/char/agp/amd64-agp.c
drivers/char/agp/ati-agp.c
drivers/char/agp/backend.c
drivers/char/agp/intel-agp.c
drivers/char/agp/nvidia-agp.c
drivers/char/agp/via-agp.c
drivers/char/hpet.c
drivers/char/random.c
drivers/char/sysrq.c
drivers/char/vr41xx_giu.c
drivers/clocksource/acpi_pm.c
drivers/dma/Kconfig
drivers/dma/dmatest.c
drivers/dma/fsldma.c
drivers/dma/fsldma.h
drivers/dma/ioat_dma.c
drivers/gpio/gpiolib.c
drivers/ide/Kconfig
drivers/ide/Makefile
drivers/ide/ide-atapi.c
drivers/ide/ide-cd.c
drivers/ide/ide-cd_ioctl.c
drivers/ide/ide-disk.c
drivers/ide/ide-disk.h
drivers/ide/ide-disk_ioctl.c
drivers/ide/ide-disk_proc.c
drivers/ide/ide-dma-sff.c
drivers/ide/ide-floppy.c
drivers/ide/ide-floppy.h
drivers/ide/ide-floppy_ioctl.c
drivers/ide/ide-floppy_proc.c
drivers/ide/ide-gd.c [new file with mode: 0644]
drivers/ide/ide-gd.h [new file with mode: 0644]
drivers/ide/ide-iops.c
drivers/ide/ide-probe.c
drivers/ide/ide-proc.c
drivers/ide/ide-tape.c
drivers/ide/pci/Makefile
drivers/ide/pci/delkin_cb.c
drivers/ide/pci/hpt34x.c [deleted file]
drivers/ide/pci/hpt366.c
drivers/ide/pci/scc_pata.c
drivers/ide/pci/sgiioc4.c
drivers/leds/Kconfig
drivers/mfd/asic3.c
drivers/mfd/htc-egpio.c
drivers/mmc/card/queue.c
drivers/mmc/host/s3cmci.c
drivers/mmc/host/s3cmci.h
drivers/net/3c59x.c
drivers/net/hamradio/baycom_ser_fdx.c
drivers/net/hamradio/scc.c
drivers/net/wan/sbni.c
drivers/parisc/dino.c
drivers/parisc/eisa.c
drivers/parisc/gsc.c
drivers/parisc/iosapic.c
drivers/parisc/superio.c
drivers/pci/dmar.c
drivers/pci/htirq.c
drivers/pci/intr_remapping.c
drivers/pcmcia/at91_cf.c
drivers/pcmcia/hd64465_ss.c
drivers/pcmcia/vrc4171_card.c
drivers/rtc/rtc-vr41xx.c
drivers/scsi/aha152x.c
drivers/scsi/ide-scsi.c
drivers/serial/68328serial.c
drivers/serial/8250.c
drivers/serial/amba-pl010.c
drivers/serial/amba-pl011.c
drivers/serial/cpm_uart/cpm_uart_core.c
drivers/serial/m32r_sio.c
drivers/serial/serial_core.c
drivers/serial/serial_lh7a40x.c
drivers/serial/sh-sci.c
drivers/serial/ucc_uart.c
drivers/video/imacfb.c [deleted file]
drivers/watchdog/ib700wdt.c
drivers/xen/events.c
fs/Kconfig
fs/binfmt_elf.c
fs/ext2/Kconfig [new file with mode: 0644]
fs/ext3/Kconfig [new file with mode: 0644]
fs/ext4/Kconfig [new file with mode: 0644]
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/fuse/inode.c
fs/jbd/Kconfig [new file with mode: 0644]
fs/jbd2/Kconfig [new file with mode: 0644]
fs/proc/array.c
fs/proc/proc_misc.c
include/asm-frv/ide.h
include/asm-generic/vmlinux.lds.h
include/asm-m68k/ide.h
include/asm-parisc/ide.h
include/asm-x86/apic.h
include/asm-x86/bigsmp/apic.h
include/asm-x86/efi.h
include/asm-x86/es7000/apic.h
include/asm-x86/ftrace.h
include/asm-x86/genapic_32.h
include/asm-x86/hpet.h
include/asm-x86/hw_irq.h
include/asm-x86/io_apic.h
include/asm-x86/irq_vectors.h
include/asm-x86/mach-default/entry_arch.h
include/asm-x86/mach-default/mach_apic.h
include/asm-x86/mach-generic/irq_vectors_limits.h [deleted file]
include/asm-x86/mach-generic/mach_apic.h
include/asm-x86/numaq/apic.h
include/asm-x86/summit/apic.h
include/asm-x86/summit/irq_vectors_limits.h [deleted file]
include/asm-x86/uv/bios.h
include/asm-x86/uv/uv_irq.h [new file with mode: 0644]
include/linux/clocksource.h
include/linux/compiler.h
include/linux/dmar.h
include/linux/efi.h
include/linux/ftrace.h
include/linux/fuse.h
include/linux/hrtimer.h
include/linux/ide.h
include/linux/init.h
include/linux/interrupt.h
include/linux/irq.h
include/linux/irqnr.h [new file with mode: 0644]
include/linux/kernel.h
include/linux/kernel_stat.h
include/linux/kprobes.h
include/linux/linkage.h
include/linux/marker.h
include/linux/mmiotrace.h
include/linux/module.h
include/linux/posix-timers.h
include/linux/ring_buffer.h [new file with mode: 0644]
include/linux/sched.h
include/linux/tick.h
include/linux/time.h
include/linux/timex.h
include/linux/tracepoint.h [new file with mode: 0644]
include/trace/sched.h [new file with mode: 0644]
init/Kconfig
init/main.c
kernel/Makefile
kernel/compat.c
kernel/exit.c
kernel/fork.c
kernel/hrtimer.c
kernel/irq/autoprobe.c
kernel/irq/chip.c
kernel/irq/handle.c
kernel/irq/internals.h
kernel/irq/manage.c
kernel/irq/migration.c
kernel/irq/proc.c
kernel/irq/resend.c
kernel/irq/spurious.c
kernel/itimer.c
kernel/kthread.c
kernel/marker.c
kernel/module.c
kernel/notifier.c
kernel/posix-cpu-timers.c
kernel/posix-timers.c
kernel/rcutorture.c
kernel/sched.c
kernel/sched_fair.c
kernel/sched_rt.c
kernel/sched_stats.h
kernel/signal.c
kernel/softirq.c
kernel/sys.c
kernel/time/clocksource.c
kernel/time/jiffies.c
kernel/time/ntp.c
kernel/time/tick-broadcast.c
kernel/time/tick-internal.h
kernel/time/tick-sched.c
kernel/time/timekeeping.c
kernel/time/timer_list.c
kernel/timer.c
kernel/trace/Kconfig
kernel/trace/Makefile
kernel/trace/ftrace.c
kernel/trace/ring_buffer.c [new file with mode: 0644]
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_boot.c [new file with mode: 0644]
kernel/trace/trace_functions.c
kernel/trace/trace_irqsoff.c
kernel/trace/trace_mmiotrace.c
kernel/trace/trace_nop.c [new file with mode: 0644]
kernel/trace/trace_sched_switch.c
kernel/trace/trace_sched_wakeup.c
kernel/trace/trace_selftest.c
kernel/trace/trace_stack.c [new file with mode: 0644]
kernel/trace/trace_sysprof.c
kernel/tracepoint.c [new file with mode: 0644]
mm/vmalloc.c
samples/Kconfig
samples/Makefile
samples/markers/probe-example.c
samples/tracepoints/Makefile [new file with mode: 0644]
samples/tracepoints/tp-samples-trace.h [new file with mode: 0644]
samples/tracepoints/tracepoint-probe-sample.c [new file with mode: 0644]
samples/tracepoints/tracepoint-probe-sample2.c [new file with mode: 0644]
samples/tracepoints/tracepoint-sample.c [new file with mode: 0644]
scripts/Makefile.build
scripts/basic/.gitignore
scripts/bootgraph.pl
scripts/checkpatch.pl
scripts/recordmcount.pl [new file with mode: 0755]
security/selinux/hooks.c

diff --git a/CREDITS b/CREDITS
index c62dcb3..2358846 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -1653,14 +1653,14 @@ S: Chapel Hill, North Carolina 27514-4818
 S: USA
 
 N: Dave Jones
-E: davej@codemonkey.org.uk
+E: davej@redhat.com
 W: http://www.codemonkey.org.uk
-D: x86 errata/setup maintenance.
-D: AGPGART driver.
+D: Assorted VIA x86 support.
+D: 2.5 AGPGART overhaul.
 D: CPUFREQ maintenance.
-D: Backport/Forwardport merge monkey.
-D: Various Janitor work.
-S: United Kingdom
+D: Fedora kernel maintainence.
+D: Misc/Other.
+S: 314 Littleton Rd, Westford, MA 01886, USA
 
 N: Martin Josfsson
 E: gandalf@wlug.westbo.se
index d9f50a1..089f613 100644 (file)
@@ -50,10 +50,12 @@ Connecting a function (probe) to a marker is done by providing a probe (function
 to call) for the specific marker through marker_probe_register() and can be
 activated by calling marker_arm(). Marker deactivation can be done by calling
 marker_disarm() as many times as marker_arm() has been called. Removing a probe
-is done through marker_probe_unregister(); it will disarm the probe and make
-sure there is no caller left using the probe when it returns. Probe removal is
-preempt-safe because preemption is disabled around the probe call. See the
-"Probe example" section below for a sample probe module.
+is done through marker_probe_unregister(); it will disarm the probe.
+marker_synchronize_unregister() must be called before the end of the module exit
+function to make sure there is no caller left using the probe. This, and the
+fact that preemption is disabled around the probe call, make sure that probe
+removal and module unload are safe. See the "Probe example" section below for a
+sample probe module.
 
 The marker mechanism supports inserting multiple instances of the same marker.
 Markers can be put in inline functions, inlined static functions, and
index 49378a9..10a0263 100644 (file)
@@ -95,8 +95,9 @@ On all -  write a character to /proc/sysrq-trigger.  e.g.:
 
 'p'     - Will dump the current registers and flags to your console.
 
-'q'     - Will dump a list of all running hrtimers.
-         WARNING: Does not cover any other timers
+'q'     - Will dump per CPU lists of all armed hrtimers (but NOT regular
+          timer_list timers) and detailed information about all
+          clockevent devices.
 
 'r'     - Turns off keyboard raw mode and sets it to XLATE.
 
diff --git a/Documentation/tracepoints.txt b/Documentation/tracepoints.txt
new file mode 100644 (file)
index 0000000..5d354e1
--- /dev/null
@@ -0,0 +1,101 @@
+                    Using the Linux Kernel Tracepoints
+
+                           Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Tracepoints and their use. It provides
+examples of how to insert tracepoints in the kernel and connect probe functions
+to them and provides some examples of probe functions.
+
+
+* Purpose of tracepoints
+
+A tracepoint placed in code provides a hook to call a function (probe) that you
+can provide at runtime. A tracepoint can be "on" (a probe is connected to it) or
+"off" (no probe is attached). When a tracepoint is "off" it has no effect,
+except for adding a tiny time penalty (checking a condition for a branch) and
+space penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section).  When a
+tracepoint is "on", the function you provide is called each time the tracepoint
+is executed, in the execution context of the caller. When the function provided
+ends its execution, it returns to the caller (continuing from the tracepoint
+site).
+
+You can put tracepoints at important locations in the code. They are
+lightweight hooks that can pass an arbitrary number of parameters,
+which prototypes are described in a tracepoint declaration placed in a header
+file.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+Two elements are required for tracepoints :
+
+- A tracepoint definition, placed in a header file.
+- The tracepoint statement, in C code.
+
+In order to use tracepoints, you should include linux/tracepoint.h.
+
+In include/trace/subsys.h :
+
+#include <linux/tracepoint.h>
+
+DEFINE_TRACE(subsys_eventname,
+       TPPTOTO(int firstarg, struct task_struct *p),
+       TPARGS(firstarg, p));
+
+In subsys/file.c (where the tracing statement must be added) :
+
+#include <trace/subsys.h>
+
+void somefct(void)
+{
+       ...
+       trace_subsys_eventname(arg, task);
+       ...
+}
+
+Where :
+- subsys_eventname is an identifier unique to your event
+    - subsys is the name of your subsystem.
+    - eventname is the name of the event to trace.
+- TPPTOTO(int firstarg, struct task_struct *p) is the prototype of the function
+  called by this tracepoint.
+- TPARGS(firstarg, p) are the parameters names, same as found in the prototype.
+
+Connecting a function (probe) to a tracepoint is done by providing a probe
+(function to call) for the specific tracepoint through
+register_trace_subsys_eventname().  Removing a probe is done through
+unregister_trace_subsys_eventname(); it will remove the probe sure there is no
+caller left using the probe when it returns. Probe removal is preempt-safe
+because preemption is disabled around the probe call. See the "Probe example"
+section below for a sample probe module.
+
+The tracepoint mechanism supports inserting multiple instances of the same
+tracepoint, but a single definition must be made of a given tracepoint name over
+all the kernel to make sure no type conflict will occur. Name mangling of the
+tracepoints is done using the prototypes to make sure typing is correct.
+Verification of probe type correctness is done at the registration site by the
+compiler. Tracepoints can be put in inline functions, inlined static functions,
+and unrolled loops as well as regular functions.
+
+The naming scheme "subsys_event" is suggested here as a convention intended
+to limit collisions. Tracepoint names are global to the kernel: they are
+considered as being the same whether they are in the core kernel image or in
+modules.
+
+
+* Probe / tracepoint example
+
+See the example provided in samples/tracepoints/src
+
+Compile them with your kernel.
+
+Run, as root :
+modprobe tracepoint-example (insmod order is not important)
+modprobe tracepoint-probe-example
+cat /proc/tracepoint-example (returns an expected error)
+rmmod tracepoint-example tracepoint-probe-example
+dmesg
index a4afb56..5bbbe20 100644 (file)
@@ -36,7 +36,7 @@ $ mount -t debugfs debugfs /debug
 $ echo mmiotrace > /debug/tracing/current_tracer
 $ cat /debug/tracing/trace_pipe > mydump.txt &
 Start X or whatever.
-$ echo "X is up" > /debug/tracing/marker
+$ echo "X is up" > /debug/tracing/trace_marker
 $ echo none > /debug/tracing/current_tracer
 Check for lost events.
 
@@ -59,9 +59,8 @@ The 'cat' process should stay running (sleeping) in the background.
 Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
 accesses to areas that are ioremapped while mmiotrace is active.
 
-[Unimplemented feature:]
 During tracing you can place comments (markers) into the trace by
-$ echo "X is up" > /debug/tracing/marker
+$ echo "X is up" > /debug/tracing/trace_marker
 This makes it easier to see which part of the (huge) trace corresponds to
 which action. It is recommended to place descriptive markers about what you
 do.
index 355c192..5c3f79c 100644 (file)
@@ -1198,7 +1198,7 @@ S:        Maintained
 
 CPU FREQUENCY DRIVERS
 P:     Dave Jones
-M:     davej@codemonkey.org.uk
+M:     davej@redhat.com
 L:     cpufreq@vger.kernel.org
 W:     http://www.codemonkey.org.uk/projects/cpufreq/
 T:     git kernel.org/pub/scm/linux/kernel/git/davej/cpufreq.git
index 99a7f19..a4555f4 100644 (file)
@@ -47,7 +47,7 @@ typedef struct irq_swizzle_struct
 
 static irq_swizzle_t *sable_lynx_irq_swizzle;
 
-static void sable_lynx_init_irq(int nr_irqs);
+static void sable_lynx_init_irq(int nr_of_irqs);
 
 #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE)
 
@@ -530,11 +530,11 @@ sable_lynx_srm_device_interrupt(unsigned long vector)
 }
 
 static void __init
-sable_lynx_init_irq(int nr_irqs)
+sable_lynx_init_irq(int nr_of_irqs)
 {
        long i;
 
-       for (i = 0; i < nr_irqs; ++i) {
+       for (i = 0; i < nr_of_irqs; ++i) {
                irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL;
                irq_desc[i].chip = &sable_lynx_irq_type;
        }
index b0653a8..3045130 100644 (file)
@@ -143,7 +143,7 @@ static struct irq_chip ixdp2x00_cpld_irq_chip = {
        .unmask = ixdp2x00_irq_unmask
 };
 
-void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs)
+void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_of_irqs)
 {
        unsigned int irq;
 
@@ -154,7 +154,7 @@ void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigne
 
        board_irq_stat = stat_reg;
        board_irq_mask = mask_reg;
-       board_irq_count = nr_irqs;
+       board_irq_count = nr_of_irqs;
 
        *board_irq_mask = 0xffffffff;
 
index d354e0f..c40fc37 100644 (file)
@@ -119,7 +119,7 @@ static void __init omap_irq_bank_init_one(struct omap_irq_bank *bank)
 
 void __init omap_init_irq(void)
 {
-       unsigned long nr_irqs = 0;
+       unsigned long nr_of_irqs = 0;
        unsigned int nr_banks = 0;
        int i;
 
@@ -133,14 +133,14 @@ void __init omap_init_irq(void)
 
                omap_irq_bank_init_one(bank);
 
-               nr_irqs += bank->nr_irqs;
+               nr_of_irqs += bank->nr_irqs;
                nr_banks++;
        }
 
        printk(KERN_INFO "Total of %ld interrupts on %d active controller%s\n",
-              nr_irqs, nr_banks, nr_banks > 1 ? "s" : "");
+              nr_of_irqs, nr_banks, nr_banks > 1 ? "s" : "");
 
-       for (i = 0; i < nr_irqs; i++) {
+       for (i = 0; i < nr_of_irqs; i++) {
                set_irq_chip(i, &omap_irq_chip);
                set_irq_handler(i, handle_level_irq);
                set_irq_flags(i, IRQF_VALID);
diff --git a/arch/arm/mach-sa1100/include/mach/ide.h b/arch/arm/mach-sa1100/include/mach/ide.h
deleted file mode 100644 (file)
index 4c99c8f..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * arch/arm/mach-sa1100/include/mach/ide.h
- *
- * Copyright (c) 1998 Hugo Fiennes & Nicolas Pitre
- *
- * 18-aug-2000: Cleanup by Erik Mouw (J.A.K.Mouw@its.tudelft.nl)
- *              Get rid of the special ide_init_hwif_ports() functions
- *              and make a generalised function that can be used by all
- *              architectures.
- */
-
-#include <asm/irq.h>
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-
-#error "This code is broken and needs update to match with current ide support"
-
-
-/*
- * Set up a hw structure for a specified data port, control port and IRQ.
- * This should follow whatever the default interface uses.
- */
-static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
-                                      unsigned long ctrl_port, int *irq)
-{
-       unsigned long reg = data_port;
-       int i;
-       int regincr = 1;
-
-       /* The Empeg board has the first two address lines unused */
-       if (machine_is_empeg())
-               regincr = 1 << 2;
-
-       /* The LART doesn't use A0 for IDE */
-       if (machine_is_lart())
-               regincr = 1 << 1;
-
-       memset(hw, 0, sizeof(*hw));
-
-       for (i = 0; i <= 7; i++) {
-               hw->io_ports_array[i] = reg;
-               reg += regincr;
-       }
-
-       hw->io_ports.ctl_addr = ctrl_port;
-
-       if (irq)
-               *irq = 0;
-}
-
-/*
- * This registers the standard ports for this architecture with the IDE
- * driver.
- */
-static __inline__ void
-ide_init_default_hwifs(void)
-{
-    if (machine_is_lart()) {
-#ifdef CONFIG_SA1100_LART
-        hw_regs_t hw;
-
-        /* Enable GPIO as interrupt line */
-        GPDR &= ~LART_GPIO_IDE;
-       set_irq_type(LART_IRQ_IDE, IRQ_TYPE_EDGE_RISING);
-
-        /* set PCMCIA interface timing */
-        MECR = 0x00060006;
-
-        /* init the interface */
-       ide_init_hwif_ports(&hw, PCMCIA_IO_0_BASE + 0x0000, PCMCIA_IO_0_BASE + 0x1000, NULL);
-        hw.irq = LART_IRQ_IDE;
-        ide_register_hw(&hw);
-#endif
-    }
-}
index c36a6d5..310477b 100644 (file)
@@ -191,7 +191,7 @@ static int __init eic_probe(struct platform_device *pdev)
        struct eic *eic;
        struct resource *regs;
        unsigned int i;
-       unsigned int nr_irqs;
+       unsigned int nr_of_irqs;
        unsigned int int_irq;
        int ret;
        u32 pattern;
@@ -224,7 +224,7 @@ static int __init eic_probe(struct platform_device *pdev)
        eic_writel(eic, IDR, ~0UL);
        eic_writel(eic, MODE, ~0UL);
        pattern = eic_readl(eic, MODE);
-       nr_irqs = fls(pattern);
+       nr_of_irqs = fls(pattern);
 
        /* Trigger on low level unless overridden by driver */
        eic_writel(eic, EDGE, 0UL);
@@ -232,7 +232,7 @@ static int __init eic_probe(struct platform_device *pdev)
 
        eic->chip = &eic_chip;
 
-       for (i = 0; i < nr_irqs; i++) {
+       for (i = 0; i < nr_of_irqs; i++) {
                set_irq_chip_and_handler(eic->first_irq + i, &eic_chip,
                                         handle_level_irq);
                set_irq_chip_data(eic->first_irq + i, eic);
@@ -256,7 +256,7 @@ static int __init eic_probe(struct platform_device *pdev)
                 eic->regs, int_irq);
        dev_info(&pdev->dev,
                 "Handling %u external IRQs, starting with IRQ %u\n",
-                nr_irqs, eic->first_irq);
+                nr_of_irqs, eic->first_irq);
 
        return 0;
 
index fc29948..39cb6da 100644 (file)
@@ -40,6 +40,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/cpu.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
index 64e1445..5ac51e6 100644 (file)
  * 2 of the License, or (at your option) any later version.
  */
 
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#else
+#include <asm/types.h>
+#endif
 #include <asm/asm-compat.h>
 #include <asm/kdump.h>
-#include <asm/types.h>
 
 /*
  * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software
index 92d20e9..2ece399 100644 (file)
@@ -232,6 +232,7 @@ static void __exit sputrace_exit(void)
 
        remove_proc_entry("sputrace", NULL);
        kfree(sputrace_log);
+       marker_synchronize_unregister();
 }
 
 module_init(sputrace_init);
index 49349ba..5b9b123 100644 (file)
@@ -26,6 +26,7 @@ config X86
        select HAVE_KPROBES
        select ARCH_WANT_OPTIONAL_GPIOLIB
        select HAVE_KRETPROBES
+       select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_DYNAMIC_FTRACE
        select HAVE_FTRACE
        select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
@@ -1242,14 +1243,6 @@ config EFI
        resultant kernel should continue to boot on existing non-EFI
        platforms.
 
-config IRQBALANCE
-       def_bool y
-       prompt "Enable kernel irq balancing"
-       depends on X86_32 && SMP && X86_IO_APIC
-       help
-         The default yes will allow the kernel to do irq load balancing.
-         Saying no will keep the kernel from doing irq load balancing.
-
 config SECCOMP
        def_bool y
        prompt "Enable seccomp to safely compute untrusted bytecode"
index 52d0359..13b8c86 100644 (file)
@@ -287,7 +287,6 @@ CONFIG_MTRR=y
 # CONFIG_MTRR_SANITIZER is not set
 CONFIG_X86_PAT=y
 CONFIG_EFI=y
-# CONFIG_IRQBALANCE is not set
 CONFIG_SECCOMP=y
 # CONFIG_HZ_100 is not set
 # CONFIG_HZ_250 is not set
index 0d41f03..d7e5a58 100644 (file)
@@ -23,7 +23,7 @@ CFLAGS_hpet.o         := $(nostackp)
 CFLAGS_tsc.o           := $(nostackp)
 
 obj-y                  := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
-obj-y                  += traps.o irq_$(BITS).o dumpstack_$(BITS).o
+obj-y                  += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y                  += time_$(BITS).o ioport.o ldt.o
 obj-y                  += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
 obj-$(CONFIG_X86_VISWS)        += visws_quirks.o
@@ -60,8 +60,8 @@ obj-$(CONFIG_X86_32_SMP)      += smpcommon.o
 obj-$(CONFIG_X86_64_SMP)       += tsc_sync.o smpcommon.o
 obj-$(CONFIG_X86_TRAMPOLINE)   += trampoline_$(BITS).o
 obj-$(CONFIG_X86_MPPARSE)      += mpparse.o
-obj-$(CONFIG_X86_LOCAL_APIC)   += apic_$(BITS).o nmi.o
-obj-$(CONFIG_X86_IO_APIC)      += io_apic_$(BITS).o
+obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o nmi.o
+obj-$(CONFIG_X86_IO_APIC)      += io_apic.o
 obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
 obj-$(CONFIG_DYNAMIC_FTRACE)   += ftrace.o
 obj-$(CONFIG_KEXEC)            += machine_kexec_$(BITS).o
@@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE)                     += microcode.o
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
         obj-y                          += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
-       obj-y                           += bios_uv.o
+       obj-y                           += bios_uv.o uv_irq.o uv_sysfs.o
         obj-y                          += genx2apic_cluster.o
         obj-y                          += genx2apic_phys.o
         obj-$(CONFIG_X86_PM_TIMER)     += pmtimer_64.o
index eb875cd..0d1c26a 100644 (file)
@@ -1256,7 +1256,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 
        count =
            acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr,
-                                 NR_IRQ_VECTORS);
+                                 nr_irqs);
        if (count < 0) {
                printk(KERN_ERR PREFIX
                       "Error parsing interrupt source overrides entry\n");
@@ -1276,7 +1276,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
 
        count =
            acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src,
-                                 NR_IRQ_VECTORS);
+                                 nr_irqs);
        if (count < 0) {
                printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
                /* TBD: Cleanup to allow fallback to MPS */
index 426e5d9..c44cd6d 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/dmi.h>
 #include <linux/cpumask.h>
 #include <asm/segment.h>
+#include <asm/desc.h>
 
 #include "realmode/wakeup.h"
 #include "sleep.h"
@@ -98,6 +99,8 @@ int acpi_save_state_mem(void)
        header->trampoline_segment = setup_trampoline() >> 4;
 #ifdef CONFIG_SMP
        stack_start.sp = temp_stack + 4096;
+       early_gdt_descr.address =
+                       (unsigned long)get_cpu_gdt_table(smp_processor_id());
 #endif
        initial_code = (unsigned long)wakeup_long64;
        saved_magic = 0x123456789abcdef0;
similarity index 79%
rename from arch/x86/kernel/apic_32.c
rename to arch/x86/kernel/apic.c
index 21c831d..04a7f96 100644 (file)
 #include <linux/mc146818rtc.h>
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
+#include <linux/ioport.h>
 #include <linux/cpu.h>
 #include <linux/clockchips.h>
 #include <linux/acpi_pmtmr.h>
 #include <linux/module.h>
 #include <linux/dmi.h>
+#include <linux/dmar.h>
 
 #include <asm/atomic.h>
 #include <asm/smp.h>
 #include <asm/desc.h>
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
+#include <asm/pgalloc.h>
 #include <asm/i8253.h>
 #include <asm/nmi.h>
+#include <asm/idle.h>
+#include <asm/proto.h>
+#include <asm/timex.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
 
 #include <mach_apic.h>
 #include <mach_apicdef.h>
 # error SPURIOUS_APIC_VECTOR definition error
 #endif
 
-unsigned long mp_lapic_addr;
-
+#ifdef CONFIG_X86_32
 /*
  * Knob to control our willingness to enable the local APIC.
  *
  * +1=force-enable
  */
 static int force_enable_local_apic;
-int disable_apic;
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+       force_enable_local_apic = 1;
+       return 0;
+}
+early_param("lapic", parse_lapic);
+/* Local APIC was disabled by the BIOS and enabled by the kernel */
+static int enabled_via_apicbase;
+
+#endif
+
+#ifdef CONFIG_X86_64
+static int apic_calibrate_pmtmr __initdata;
+static __init int setup_apicpmtimer(char *s)
+{
+       apic_calibrate_pmtmr = 1;
+       notsc_setup(NULL);
+       return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
+#ifdef CONFIG_X86_64
+#define HAVE_X2APIC
+#endif
+
+#ifdef HAVE_X2APIC
+int x2apic;
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
+int disable_x2apic;
+static __init int setup_nox2apic(char *str)
+{
+       disable_x2apic = 1;
+       setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+       return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+#endif
 
+unsigned long mp_lapic_addr;
+int disable_apic;
 /* Disable local APIC timer from the kernel commandline or via dmi quirk */
 static int disable_apic_timer __cpuinitdata;
 /* Local APIC timer works in C2 */
@@ -110,9 +160,6 @@ static struct clock_event_device lapic_clockevent = {
 };
 static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
-/* Local APIC was disabled by the BIOS and enabled by the kernel */
-static int enabled_via_apicbase;
-
 static unsigned long apic_phys;
 
 /*
@@ -202,6 +249,42 @@ static struct apic_ops xapic_ops = {
 struct apic_ops __read_mostly *apic_ops = &xapic_ops;
 EXPORT_SYMBOL_GPL(apic_ops);
 
+#ifdef HAVE_X2APIC
+static void x2apic_wait_icr_idle(void)
+{
+       /* no need to wait for icr idle in x2apic */
+       return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+       /* no need to wait for icr idle in x2apic */
+       return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+       wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+       unsigned long val;
+
+       rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+       return val;
+}
+
+static struct apic_ops x2apic_ops = {
+       .read = native_apic_msr_read,
+       .write = native_apic_msr_write,
+       .icr_read = x2apic_icr_read,
+       .icr_write = x2apic_icr_write,
+       .wait_icr_idle = x2apic_wait_icr_idle,
+       .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+#endif
+
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
@@ -219,6 +302,7 @@ void __cpuinit enable_NMI_through_LVT0(void)
        apic_write(APIC_LVT0, v);
 }
 
+#ifdef CONFIG_X86_32
 /**
  * get_physical_broadcast - Get number of physical broadcast IDs
  */
@@ -226,6 +310,7 @@ int get_physical_broadcast(void)
 {
        return modern_apic() ? 0xff : 0xf;
 }
+#endif
 
 /**
  * lapic_get_maxlvt - get the maximum number of local vector table entries
@@ -247,11 +332,7 @@ int lapic_get_maxlvt(void)
  */
 
 /* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
 #define APIC_DIVISOR 16
-#endif
 
 /*
  * This function sets up the local APIC timer, with a timeout of
@@ -383,7 +464,7 @@ static void lapic_timer_broadcast(cpumask_t mask)
  * Setup the local APIC timer for this CPU. Copy the initilized values
  * of the boot CPU and register the clock event in the framework.
  */
-static void __devinit setup_APIC_timer(void)
+static void __cpuinit setup_APIC_timer(void)
 {
        struct clock_event_device *levt = &__get_cpu_var(lapic_events);
 
@@ -453,14 +534,51 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
        }
 }
 
+static int __init calibrate_by_pmtimer(long deltapm, long *delta)
+{
+       const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
+       const long pm_thresh = pm_100ms / 100;
+       unsigned long mult;
+       u64 res;
+
+#ifndef CONFIG_X86_PM_TIMER
+       return -1;
+#endif
+
+       apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
+
+       /* Check, if the PM timer is available */
+       if (!deltapm)
+               return -1;
+
+       mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+
+       if (deltapm > (pm_100ms - pm_thresh) &&
+           deltapm < (pm_100ms + pm_thresh)) {
+               apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
+       } else {
+               res = (((u64)deltapm) *  mult) >> 22;
+               do_div(res, 1000000);
+               printk(KERN_WARNING "APIC calibration not consistent "
+                       "with PM Timer: %ldms instead of 100ms\n",
+                       (long)res);
+               /* Correct the lapic counter value */
+               res = (((u64)(*delta)) * pm_100ms);
+               do_div(res, deltapm);
+               printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
+                       "%lu (%ld)\n", (unsigned long)res, *delta);
+               *delta = (long)res;
+       }
+
+       return 0;
+}
+
 static int __init calibrate_APIC_clock(void)
 {
        struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-       const long pm_100ms = PMTMR_TICKS_PER_SEC/10;
-       const long pm_thresh = pm_100ms/100;
        void (*real_handler)(struct clock_event_device *dev);
        unsigned long deltaj;
-       long delta, deltapm;
+       long delta;
        int pm_referenced = 0;
 
        local_irq_disable();
@@ -470,10 +588,10 @@ static int __init calibrate_APIC_clock(void)
        global_clock_event->event_handler = lapic_cal_handler;
 
        /*
-        * Setup the APIC counter to 1e9. There is no way the lapic
+        * Setup the APIC counter to maximum. There is no way the lapic
         * can underflow in the 100ms detection time frame
         */
-       __setup_APIC_LVTT(1000000000, 0, 0);
+       __setup_APIC_LVTT(0xffffffff, 0, 0);
 
        /* Let the interrupts run */
        local_irq_enable();
@@ -490,34 +608,9 @@ static int __init calibrate_APIC_clock(void)
        delta = lapic_cal_t1 - lapic_cal_t2;
        apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
 
-       /* Check, if the PM timer is available */
-       deltapm = lapic_cal_pm2 - lapic_cal_pm1;
-       apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
-
-       if (deltapm) {
-               unsigned long mult;
-               u64 res;
-
-               mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
-
-               if (deltapm > (pm_100ms - pm_thresh) &&
-                   deltapm < (pm_100ms + pm_thresh)) {
-                       apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
-               } else {
-                       res = (((u64) deltapm) *  mult) >> 22;
-                       do_div(res, 1000000);
-                       printk(KERN_WARNING "APIC calibration not consistent "
-                              "with PM Timer: %ldms instead of 100ms\n",
-                              (long)res);
-                       /* Correct the lapic counter value */
-                       res = (((u64) delta) * pm_100ms);
-                       do_div(res, deltapm);
-                       printk(KERN_INFO "APIC delta adjusted to PM-Timer: "
-                              "%lu (%ld)\n", (unsigned long) res, delta);
-                       delta = (long) res;
-               }
-               pm_referenced = 1;
-       }
+       /* we trust the PM based calibration if possible */
+       pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
+                                       &delta);
 
        /* Calculate the scaled math multiplication factor */
        lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -559,7 +652,10 @@ static int __init calibrate_APIC_clock(void)
 
        levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
 
-       /* We trust the pm timer based calibration */
+       /*
+        * PM timer calibration failed or not turned on
+        * so lets try APIC timer based calibration
+        */
        if (!pm_referenced) {
                apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
 
@@ -652,7 +748,7 @@ void __init setup_boot_APIC_clock(void)
        setup_APIC_timer();
 }
 
-void __devinit setup_secondary_APIC_clock(void)
+void __cpuinit setup_secondary_APIC_clock(void)
 {
        setup_APIC_timer();
 }
@@ -718,6 +814,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
         * Besides, if we don't timer interrupts ignore the global
         * interrupt lock, which is the WrongThing (tm) to do.
         */
+#ifdef CONFIG_X86_64
+       exit_idle();
+#endif
        irq_enter();
        local_apic_timer_interrupt();
        irq_exit();
@@ -991,40 +1090,43 @@ void __init init_bsp_APIC(void)
 
 static void __cpuinit lapic_setup_esr(void)
 {
-       unsigned long oldvalue, value, maxlvt;
-       if (lapic_is_integrated() && !esr_disable) {
-               if (esr_disable) {
-                       /*
-                        * Something untraceable is creating bad interrupts on
-                        * secondary quads ... for the moment, just leave the
-                        * ESR disabled - we can't do anything useful with the
-                        * errors anyway - mbligh
-                        */
-                       printk(KERN_INFO "Leaving ESR disabled.\n");
-                       return;
-               }
-               /* !82489DX */
-               maxlvt = lapic_get_maxlvt();
-               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-                       apic_write(APIC_ESR, 0);
-               oldvalue = apic_read(APIC_ESR);
+       unsigned int oldvalue, value, maxlvt;
+
+       if (!lapic_is_integrated()) {
+               printk(KERN_INFO "No ESR for 82489DX.\n");
+               return;
+       }
 
-               /* enables sending errors */
-               value = ERROR_APIC_VECTOR;
-               apic_write(APIC_LVTERR, value);
+       if (esr_disable) {
                /*
-                * spec says clear errors after enabling vector.
+                * Something untraceable is creating bad interrupts on
+                * secondary quads ... for the moment, just leave the
+                * ESR disabled - we can't do anything useful with the
+                * errors anyway - mbligh
                 */
-               if (maxlvt > 3)
-                       apic_write(APIC_ESR, 0);
-               value = apic_read(APIC_ESR);
-               if (value != oldvalue)
-                       apic_printk(APIC_VERBOSE, "ESR value before enabling "
-                               "vector: 0x%08lx  after: 0x%08lx\n",
-                               oldvalue, value);
-       } else {
-               printk(KERN_INFO "No ESR for 82489DX.\n");
+               printk(KERN_INFO "Leaving ESR disabled.\n");
+               return;
        }
+
+       maxlvt = lapic_get_maxlvt();
+       if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+               apic_write(APIC_ESR, 0);
+       oldvalue = apic_read(APIC_ESR);
+
+       /* enables sending errors */
+       value = ERROR_APIC_VECTOR;
+       apic_write(APIC_LVTERR, value);
+
+       /*
+        * spec says clear errors after enabling vector.
+        */
+       if (maxlvt > 3)
+               apic_write(APIC_ESR, 0);
+       value = apic_read(APIC_ESR);
+       if (value != oldvalue)
+               apic_printk(APIC_VERBOSE, "ESR value before enabling "
+                       "vector: 0x%08x  after: 0x%08x\n",
+                       oldvalue, value);
 }
 
 
@@ -1033,24 +1135,27 @@ static void __cpuinit lapic_setup_esr(void)
  */
 void __cpuinit setup_local_APIC(void)
 {
-       unsigned long value, integrated;
+       unsigned int value;
        int i, j;
 
+#ifdef CONFIG_X86_32
        /* Pound the ESR really hard over the head with a big hammer - mbligh */
-       if (esr_disable) {
+       if (lapic_is_integrated() && esr_disable) {
                apic_write(APIC_ESR, 0);
                apic_write(APIC_ESR, 0);
                apic_write(APIC_ESR, 0);
                apic_write(APIC_ESR, 0);
        }
+#endif
 
-       integrated = lapic_is_integrated();
+       preempt_disable();
 
        /*
         * Double-check whether this APIC is really registered.
+        * This is meaningless in clustered apic mode, so we skip it.
         */
        if (!apic_id_registered())
-               WARN_ON_ONCE(1);
+               BUG();
 
        /*
         * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1096,6 +1201,7 @@ void __cpuinit setup_local_APIC(void)
         */
        value |= APIC_SPIV_APIC_ENABLED;
 
+#ifdef CONFIG_X86_32
        /*
         * Some unknown Intel IO/APIC (or APIC) errata is biting us with
         * certain networking cards. If high frequency interrupts are
@@ -1116,8 +1222,13 @@ void __cpuinit setup_local_APIC(void)
         * See also the comment in end_level_ioapic_irq().  --macro
         */
 
-       /* Enable focus processor (bit==0) */
+       /*
+        * - enable focus processor (bit==0)
+        * - 64bit mode always use processor focus
+        *   so no need to set it
+        */
        value &= ~APIC_SPIV_FOCUS_DISABLED;
+#endif
 
        /*
         * Set spurious IRQ vector
@@ -1154,9 +1265,11 @@ void __cpuinit setup_local_APIC(void)
                value = APIC_DM_NMI;
        else
                value = APIC_DM_NMI | APIC_LVT_MASKED;
-       if (!integrated)                /* 82489DX */
+       if (!lapic_is_integrated())             /* 82489DX */
                value |= APIC_LVT_LEVEL_TRIGGER;
        apic_write(APIC_LVT1, value);
+
+       preempt_enable();
 }
 
 void __cpuinit end_local_APIC_setup(void)
@@ -1177,6 +1290,153 @@ void __cpuinit end_local_APIC_setup(void)
        apic_pm_activate();
 }
 
+#ifdef HAVE_X2APIC
+void check_x2apic(void)
+{
+       int msr, msr2;
+
+       rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+       if (msr & X2APIC_ENABLE) {
+               printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+               x2apic_preenabled = x2apic = 1;
+               apic_ops = &x2apic_ops;
+       }
+}
+
+void enable_x2apic(void)
+{
+       int msr, msr2;
+
+       rdmsr(MSR_IA32_APICBASE, msr, msr2);
+       if (!(msr & X2APIC_ENABLE)) {
+               printk("Enabling x2apic\n");
+               wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+       }
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+       int ret;
+       unsigned long flags;
+
+       if (!cpu_has_x2apic)
+               return;
+
+       if (!x2apic_preenabled && disable_x2apic) {
+               printk(KERN_INFO
+                      "Skipped enabling x2apic and Interrupt-remapping "
+                      "because of nox2apic\n");
+               return;
+       }
+
+       if (x2apic_preenabled && disable_x2apic)
+               panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+       if (!x2apic_preenabled && skip_ioapic_setup) {
+               printk(KERN_INFO
+                      "Skipped enabling x2apic and Interrupt-remapping "
+                      "because of skipping io-apic setup\n");
+               return;
+       }
+
+       ret = dmar_table_init();
+       if (ret) {
+               printk(KERN_INFO
+                      "dmar_table_init() failed with %d:\n", ret);
+
+               if (x2apic_preenabled)
+                       panic("x2apic enabled by bios. But IR enabling failed");
+               else
+                       printk(KERN_INFO
+                              "Not enabling x2apic,Intr-remapping\n");
+               return;
+       }
+
+       local_irq_save(flags);
+       mask_8259A();
+
+       ret = save_mask_IO_APIC_setup();
+       if (ret) {
+               printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret);
+               goto end;
+       }
+
+       ret = enable_intr_remapping(1);
+
+       if (ret && x2apic_preenabled) {
+               local_irq_restore(flags);
+               panic("x2apic enabled by bios. But IR enabling failed");
+       }
+
+       if (ret)
+               goto end_restore;
+
+       if (!x2apic) {
+               x2apic = 1;
+               apic_ops = &x2apic_ops;
+               enable_x2apic();
+       }
+
+end_restore:
+       if (ret)
+               /*
+                * IR enabling failed
+                */
+               restore_IO_APIC_setup();
+       else
+               reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+end:
+       unmask_8259A();
+       local_irq_restore(flags);
+
+       if (!ret) {
+               if (!x2apic_preenabled)
+                       printk(KERN_INFO
+                              "Enabled x2apic and interrupt-remapping\n");
+               else
+                       printk(KERN_INFO
+                              "Enabled Interrupt-remapping\n");
+       } else
+               printk(KERN_ERR
+                      "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+       if (!cpu_has_x2apic)
+               return;
+
+       if (x2apic_preenabled)
+               panic("x2apic enabled prior OS handover,"
+                     " enable CONFIG_INTR_REMAP");
+
+       printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+              " and x2apic\n");
+#endif
+
+       return;
+}
+#endif /* HAVE_X2APIC */
+
+#ifdef CONFIG_X86_64
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ * On AMD64 we trust the BIOS - if it says no APIC it is likely
+ * not correctly set up (usually the APIC timer won't work etc.)
+ */
+static int __init detect_init_APIC(void)
+{
+       if (!cpu_has_apic) {
+               printk(KERN_INFO "No local APIC present\n");
+               return -1;
+       }
+
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+       boot_cpu_physical_apicid = 0;
+       return 0;
+}
+#else
 /*
  * Detect and initialize APIC
  */
@@ -1255,12 +1515,46 @@ no_apic:
        printk(KERN_INFO "No local APIC present or hardware disabled\n");
        return -1;
 }
+#endif
+
+#ifdef CONFIG_X86_64
+void __init early_init_lapic_mapping(void)
+{
+       unsigned long phys_addr;
+
+       /*
+        * If no local APIC can be found then go out
+        * : it means there is no mpatable and MADT
+        */
+       if (!smp_found_config)
+               return;
+
+       phys_addr = mp_lapic_addr;
+
+       set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
+       apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+                   APIC_BASE, phys_addr);
+
+       /*
+        * Fetch the APIC ID of the BSP in case we have a
+        * default configuration (or the MP table is broken).
+        */
+       boot_cpu_physical_apicid = read_apic_id();
+}
+#endif
 
 /**
  * init_apic_mappings - initialize APIC mappings
  */
 void __init init_apic_mappings(void)
 {
+#ifdef HAVE_X2APIC
+       if (x2apic) {
+               boot_cpu_physical_apicid = read_apic_id();
+               return;
+       }
+#endif
+
        /*
         * If no local APIC can be found then set up a fake all
         * zeroes page to simulate the local APIC and another
@@ -1273,8 +1567,8 @@ void __init init_apic_mappings(void)
                apic_phys = mp_lapic_addr;
 
        set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-       printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE,
-              apic_phys);
+       apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
+                               APIC_BASE, apic_phys);
 
        /*
         * Fetch the APIC ID of the BSP in case we have a
@@ -1282,18 +1576,27 @@ void __init init_apic_mappings(void)
         */
        if (boot_cpu_physical_apicid == -1U)
                boot_cpu_physical_apicid = read_apic_id();
-
 }
 
 /*
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
-
 int apic_version[MAX_APICS];
 
 int __init APIC_init_uniprocessor(void)
 {
+#ifdef CONFIG_X86_64
+       if (disable_apic) {
+               printk(KERN_INFO "Apic disabled\n");
+               return -1;
+       }
+       if (!cpu_has_apic) {
+               disable_apic = 1;
+               printk(KERN_INFO "Apic disabled by BIOS\n");
+               return -1;
+       }
+#else
        if (!smp_found_config && !cpu_has_apic)
                return -1;
 
@@ -1302,39 +1605,68 @@ int __init APIC_init_uniprocessor(void)
         */
        if (!cpu_has_apic &&
            APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
-               printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+               printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n",
                       boot_cpu_physical_apicid);
                clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
                return -1;
        }
+#endif
 
-       verify_local_APIC();
+#ifdef HAVE_X2APIC
+       enable_IR_x2apic();
+#endif
+#ifdef CONFIG_X86_64
+       setup_apic_routing();
+#endif
 
+       verify_local_APIC();
        connect_bsp_APIC();
 
+#ifdef CONFIG_X86_64
+       apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
+#else
        /*
         * Hack: In case of kdump, after a crash, kernel might be booting
         * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
         * might be zero if read from MP tables. Get it from LAPIC.
         */
-#ifdef CONFIG_CRASH_DUMP
+# ifdef CONFIG_CRASH_DUMP
        boot_cpu_physical_apicid = read_apic_id();
+# endif
 #endif
        physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-
        setup_local_APIC();
 
+#ifdef CONFIG_X86_64
+       /*
+        * Now enable IO-APICs, actually call clear_IO_APIC
+        * We need clear_IO_APIC before enabling vector on BP
+        */
+       if (!skip_ioapic_setup && nr_ioapics)
+               enable_IO_APIC();
+#endif
+
 #ifdef CONFIG_X86_IO_APIC
        if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
 #endif
                localise_nmi_watchdog();
        end_local_APIC_setup();
+
 #ifdef CONFIG_X86_IO_APIC
-       if (smp_found_config)
-               if (!skip_ioapic_setup && nr_ioapics)
-                       setup_IO_APIC();
+       if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+               setup_IO_APIC();
+# ifdef CONFIG_X86_64
+       else
+               nr_ioapics = 0;
+# endif
 #endif
+
+#ifdef CONFIG_X86_64
+       setup_boot_APIC_clock();
+       check_nmi_watchdog();
+#else
        setup_boot_clock();
+#endif
 
        return 0;
 }
@@ -1348,8 +1680,11 @@ int __init APIC_init_uniprocessor(void)
  */
 void smp_spurious_interrupt(struct pt_regs *regs)
 {
-       unsigned long v;
+       u32 v;
 
+#ifdef CONFIG_X86_64
+       exit_idle();
+#endif
        irq_enter();
        /*
         * Check if this really is a spurious interrupt and ACK it
@@ -1360,10 +1695,14 @@ void smp_spurious_interrupt(struct pt_regs *regs)
        if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
                ack_APIC_irq();
 
+#ifdef CONFIG_X86_64
+       add_pda(irq_spurious_count, 1);
+#else
        /* see sw-dev-man vol 3, chapter 7.4.13.5 */
        printk(KERN_INFO "spurious APIC interrupt on CPU#%d, "
               "should never happen.\n", smp_processor_id());
        __get_cpu_var(irq_stat).irq_spurious_count++;
+#endif
        irq_exit();
 }
 
@@ -1372,8 +1711,11 @@ void smp_spurious_interrupt(struct pt_regs *regs)
  */
 void smp_error_interrupt(struct pt_regs *regs)
 {
-       unsigned long v, v1;
+       u32 v, v1;
 
+#ifdef CONFIG_X86_64
+       exit_idle();
+#endif
        irq_enter();
        /* First tickle the hardware, only then report what went on. -- REW */
        v = apic_read(APIC_ESR);
@@ -1392,7 +1734,7 @@ void smp_error_interrupt(struct pt_regs *regs)
           6: Received illegal vector
           7: Illegal register address
        */
-       printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
+       printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
                smp_processor_id(), v , v1);
        irq_exit();
 }
@@ -1565,6 +1907,13 @@ void __cpuinit generic_processor_info(int apicid, int version)
        cpu_set(cpu, cpu_present_map);
 }
 
+#ifdef CONFIG_X86_64
+int hard_smp_processor_id(void)
+{
+       return read_apic_id();
+}
+#endif
+
 /*
  * Power management
  */
@@ -1640,7 +1989,7 @@ static int lapic_resume(struct sys_device *dev)
 
        local_irq_save(flags);
 
-#ifdef CONFIG_X86_64
+#ifdef HAVE_X2APIC
        if (x2apic)
                enable_x2apic();
        else
@@ -1702,7 +2051,7 @@ static struct sys_device device_lapic = {
        .cls    = &lapic_sysclass,
 };
 
-static void __devinit apic_pm_activate(void)
+static void __cpuinit apic_pm_activate(void)
 {
        apic_pm_state.active = 1;
 }
@@ -1728,16 +2077,87 @@ static void apic_pm_activate(void) { }
 
 #endif /* CONFIG_PM */
 
+#ifdef CONFIG_X86_64
 /*
- * APIC command line parameters
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ *
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis. Use available data to take a good guess.
+ * If in doubt, go HPET.
  */
-static int __init parse_lapic(char *arg)
+__cpuinit int apic_is_clustered_box(void)
 {
-       force_enable_local_apic = 1;
-       return 0;
+       int i, clusters, zeros;
+       unsigned id;
+       u16 *bios_cpu_apicid;
+       DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
+
+       /*
+        * there is not this kind of box with AMD CPU yet.
+        * Some AMD box with quadcore cpu and 8 sockets apicid
+        * will be [4, 0x23] or [8, 0x27] could be thought to
+        * vsmp box still need checking...
+        */
+       if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
+               return 0;
+
+       bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
+       bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
+
+       for (i = 0; i < NR_CPUS; i++) {
+               /* are we being called early in kernel startup? */
+               if (bios_cpu_apicid) {
+                       id = bios_cpu_apicid[i];
+               }
+               else if (i < nr_cpu_ids) {
+                       if (cpu_present(i))
+                               id = per_cpu(x86_bios_cpu_apicid, i);
+                       else
+                               continue;
+               }
+               else
+                       break;
+
+               if (id != BAD_APICID)
+                       __set_bit(APIC_CLUSTERID(id), clustermap);
+       }
+
+       /* Problem:  Partially populated chassis may not have CPUs in some of
+        * the APIC clusters they have been allocated.  Only present CPUs have
+        * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+        * Since clusters are allocated sequentially, count zeros only if
+        * they are bounded by ones.
+        */
+       clusters = 0;
+       zeros = 0;
+       for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+               if (test_bit(i, clustermap)) {
+                       clusters += 1 + zeros;
+                       zeros = 0;
+               } else
+                       ++zeros;
+       }
+
+       /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+        * not guaranteed to be synced between boards
+        */
+       if (is_vsmp_box() && clusters > 1)
+               return 1;
+
+       /*
+        * If clusters > 2, then should be multi-chassis.
+        * May have to revisit this when multi-core + hyperthreaded CPUs come
+        * out, but AFAIK this will work even for them.
+        */
+       return (clusters > 2);
 }
-early_param("lapic", parse_lapic);
+#endif
 
+/*
+ * APIC command line parameters
+ */
 static int __init setup_disableapic(char *arg)
 {
        disable_apic = 1;
@@ -1779,7 +2199,6 @@ static int __init apic_set_verbosity(char *arg)
        if (!arg)  {
 #ifdef CONFIG_X86_64
                skip_ioapic_setup = 0;
-               ioapic_force = 1;
                return 0;
 #endif
                return -EINVAL;
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
deleted file mode 100644 (file)
index 94ddb69..0000000
+++ /dev/null
@@ -1,1848 +0,0 @@
-/*
- *     Local APIC handling, local APIC timers
- *
- *     (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *     Fixes
- *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
- *                                     thanks to Eric Gilmore
- *                                     and Rolf G. Tews
- *                                     for testing these extensively.
- *     Maciej W. Rozycki       :       Various updates and fixes.
- *     Mikael Pettersson       :       Power Management for UP-APIC.
- *     Pavel Machek and
- *     Mikael Pettersson       :       PM converted to driver model.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/ioport.h>
-#include <linux/clockchips.h>
-#include <linux/acpi_pmtmr.h>
-#include <linux/module.h>
-#include <linux/dmar.h>
-
-#include <asm/atomic.h>
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/hpet.h>
-#include <asm/pgalloc.h>
-#include <asm/nmi.h>
-#include <asm/idle.h>
-#include <asm/proto.h>
-#include <asm/timex.h>
-#include <asm/apic.h>
-#include <asm/i8259.h>
-
-#include <mach_ipi.h>
-#include <mach_apic.h>
-
-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
-static int disable_apic_timer __cpuinitdata;
-static int apic_calibrate_pmtmr __initdata;
-int disable_apic;
-int disable_x2apic;
-int x2apic;
-
-/* x2apic enabled before OS handover */
-int x2apic_preenabled;
-
-/* Local APIC timer works in C2 */
-int local_apic_timer_c2_ok;
-EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
-
-/*
- * Debug level, exported for io_apic.c
- */
-unsigned int apic_verbosity;
-
-/* Have we found an MP table */
-int smp_found_config;
-
-static struct resource lapic_resource = {
-       .name = "Local APIC",
-       .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
-};
-
-static unsigned int calibration_result;
-
-static int lapic_next_event(unsigned long delta,
-                           struct clock_event_device *evt);
-static void lapic_timer_setup(enum clock_event_mode mode,
-                             struct clock_event_device *evt);
-static void lapic_timer_broadcast(cpumask_t mask);
-static void apic_pm_activate(void);
-
-/*
- * The local apic timer can be used for any function which is CPU local.
- */
-static struct clock_event_device lapic_clockevent = {
-       .name           = "lapic",
-       .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
-                       | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
-       .shift          = 32,
-       .set_mode       = lapic_timer_setup,
-       .set_next_event = lapic_next_event,
-       .broadcast      = lapic_timer_broadcast,
-       .rating         = 100,
-       .irq            = -1,
-};
-static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
-
-static unsigned long apic_phys;
-
-unsigned long mp_lapic_addr;
-
-/*
- * Get the LAPIC version
- */
-static inline int lapic_get_version(void)
-{
-       return GET_APIC_VERSION(apic_read(APIC_LVR));
-}
-
-/*
- * Check, if the APIC is integrated or a separate chip
- */
-static inline int lapic_is_integrated(void)
-{
-#ifdef CONFIG_X86_64
-       return 1;
-#else
-       return APIC_INTEGRATED(lapic_get_version());
-#endif
-}
-
-/*
- * Check, whether this is a modern or a first generation APIC
- */
-static int modern_apic(void)
-{
-       /* AMD systems use old APIC versions, so check the CPU */
-       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-           boot_cpu_data.x86 >= 0xf)
-               return 1;
-       return lapic_get_version() >= 0x14;
-}
-
-/*
- * Paravirt kernels also might be using these below ops. So we still
- * use generic apic_read()/apic_write(), which might be pointing to different
- * ops in PARAVIRT case.
- */
-void xapic_wait_icr_idle(void)
-{
-       while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
-               cpu_relax();
-}
-
-u32 safe_xapic_wait_icr_idle(void)
-{
-       u32 send_status;
-       int timeout;
-
-       timeout = 0;
-       do {
-               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
-               if (!send_status)
-                       break;
-               udelay(100);
-       } while (timeout++ < 1000);
-
-       return send_status;
-}
-
-void xapic_icr_write(u32 low, u32 id)
-{
-       apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
-       apic_write(APIC_ICR, low);
-}
-
-u64 xapic_icr_read(void)
-{
-       u32 icr1, icr2;
-
-       icr2 = apic_read(APIC_ICR2);
-       icr1 = apic_read(APIC_ICR);
-
-       return icr1 | ((u64)icr2 << 32);
-}
-
-static struct apic_ops xapic_ops = {
-       .read = native_apic_mem_read,
-       .write = native_apic_mem_write,
-       .icr_read = xapic_icr_read,
-       .icr_write = xapic_icr_write,
-       .wait_icr_idle = xapic_wait_icr_idle,
-       .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
-};
-
-struct apic_ops __read_mostly *apic_ops = &xapic_ops;
-EXPORT_SYMBOL_GPL(apic_ops);
-
-static void x2apic_wait_icr_idle(void)
-{
-       /* no need to wait for icr idle in x2apic */
-       return;
-}
-
-static u32 safe_x2apic_wait_icr_idle(void)
-{
-       /* no need to wait for icr idle in x2apic */
-       return 0;
-}
-
-void x2apic_icr_write(u32 low, u32 id)
-{
-       wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
-}
-
-u64 x2apic_icr_read(void)
-{
-       unsigned long val;
-
-       rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
-       return val;
-}
-
-static struct apic_ops x2apic_ops = {
-       .read = native_apic_msr_read,
-       .write = native_apic_msr_write,
-       .icr_read = x2apic_icr_read,
-       .icr_write = x2apic_icr_write,
-       .wait_icr_idle = x2apic_wait_icr_idle,
-       .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
-};
-
-/**
- * enable_NMI_through_LVT0 - enable NMI through local vector table 0
- */
-void __cpuinit enable_NMI_through_LVT0(void)
-{
-       unsigned int v;
-
-       /* unmask and set to NMI */
-       v = APIC_DM_NMI;
-
-       /* Level triggered for 82489DX (32bit mode) */
-       if (!lapic_is_integrated())
-               v |= APIC_LVT_LEVEL_TRIGGER;
-
-       apic_write(APIC_LVT0, v);
-}
-
-/**
- * lapic_get_maxlvt - get the maximum number of local vector table entries
- */
-int lapic_get_maxlvt(void)
-{
-       unsigned int v;
-
-       v = apic_read(APIC_LVR);
-       /*
-        * - we always have APIC integrated on 64bit mode
-        * - 82489DXs do not report # of LVT entries
-        */
-       return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
-}
-
-/*
- * Local APIC timer
- */
-
-/* Clock divisor */
-#ifdef CONFG_X86_64
-#define APIC_DIVISOR 1
-#else
-#define APIC_DIVISOR 16
-#endif
-
-/*
- * This function sets up the local APIC timer, with a timeout of
- * 'clocks' APIC bus clock. During calibration we actually call
- * this function twice on the boot CPU, once with a bogus timeout
- * value, second time for real. The other (noncalibrating) CPUs
- * call this function only once, with the real, calibrated value.
- *
- * We do reads before writes even if unnecessary, to get around the
- * P5 APIC double write bug.
- */
-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
-{
-       unsigned int lvtt_value, tmp_value;
-
-       lvtt_value = LOCAL_TIMER_VECTOR;
-       if (!oneshot)
-               lvtt_value |= APIC_LVT_TIMER_PERIODIC;
-       if (!lapic_is_integrated())
-               lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
-
-       if (!irqen)
-               lvtt_value |= APIC_LVT_MASKED;
-
-       apic_write(APIC_LVTT, lvtt_value);
-
-       /*
-        * Divide PICLK by 16
-        */
-       tmp_value = apic_read(APIC_TDCR);
-       apic_write(APIC_TDCR,
-               (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
-               APIC_TDR_DIV_16);
-
-       if (!oneshot)
-               apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
-}
-
-/*
- * Setup extended LVT, AMD specific (K8, family 10h)
- *
- * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
- * MCE interrupts are supported. Thus MCE offset must be set to 0.
- *
- * If mask=1, the LVT entry does not generate interrupts while mask=0
- * enables the vector. See also the BKDGs.
- */
-
-#define APIC_EILVT_LVTOFF_MCE 0
-#define APIC_EILVT_LVTOFF_IBS 1
-
-static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
-{
-       unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
-       unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
-
-       apic_write(reg, v);
-}
-
-u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
-{
-       setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
-       return APIC_EILVT_LVTOFF_MCE;
-}
-
-u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
-{
-       setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
-       return APIC_EILVT_LVTOFF_IBS;
-}
-EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
-
-/*
- * Program the next event, relative to now
- */
-static int lapic_next_event(unsigned long delta,
-                           struct clock_event_device *evt)
-{
-       apic_write(APIC_TMICT, delta);
-       return 0;
-}
-
-/*
- * Setup the lapic timer in periodic or oneshot mode
- */
-static void lapic_timer_setup(enum clock_event_mode mode,
-                             struct clock_event_device *evt)
-{
-       unsigned long flags;
-       unsigned int v;
-
-       /* Lapic used as dummy for broadcast ? */
-       if (evt->features & CLOCK_EVT_FEAT_DUMMY)
-               return;
-
-       local_irq_save(flags);
-
-       switch (mode) {
-       case CLOCK_EVT_MODE_PERIODIC:
-       case CLOCK_EVT_MODE_ONESHOT:
-               __setup_APIC_LVTT(calibration_result,
-                                 mode != CLOCK_EVT_MODE_PERIODIC, 1);
-               break;
-       case CLOCK_EVT_MODE_UNUSED:
-       case CLOCK_EVT_MODE_SHUTDOWN:
-               v = apic_read(APIC_LVTT);
-               v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-               apic_write(APIC_LVTT, v);
-               break;
-       case CLOCK_EVT_MODE_RESUME:
-               /* Nothing to do here */
-               break;
-       }
-
-       local_irq_restore(flags);
-}
-
-/*
- * Local APIC timer broadcast function
- */
-static void lapic_timer_broadcast(cpumask_t mask)
-{
-#ifdef CONFIG_SMP
-       send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
-#endif
-}
-
-/*
- * Setup the local APIC timer for this CPU. Copy the initilized values
- * of the boot CPU and register the clock event in the framework.
- */
-static void setup_APIC_timer(void)
-{
-       struct clock_event_device *levt = &__get_cpu_var(lapic_events);
-
-       memcpy(levt, &lapic_clockevent, sizeof(*levt));
-       levt->cpumask = cpumask_of_cpu(smp_processor_id());
-
-       clockevents_register_device(levt);
-}
-
-/*
- * In this function we calibrate APIC bus clocks to the external
- * timer. Unfortunately we cannot use jiffies and the timer irq
- * to calibrate, since some later bootup code depends on getting
- * the first irq? Ugh.
- *
- * We want to do the calibration only once since we
- * want to have local timer irqs syncron. CPUs connected
- * by the same APIC bus have the very same bus frequency.
- * And we want to have irqs off anyways, no accidental
- * APIC irq that way.
- */
-
-#define TICK_COUNT 100000000
-
-static int __init calibrate_APIC_clock(void)
-{
-       unsigned apic, apic_start;
-       unsigned long tsc, tsc_start;
-       int result;
-
-       local_irq_disable();
-
-       /*
-        * Put whatever arbitrary (but long enough) timeout
-        * value into the APIC clock, we just want to get the
-        * counter running for calibration.
-        *
-        * No interrupt enable !
-        */
-       __setup_APIC_LVTT(250000000, 0, 0);
-
-       apic_start = apic_read(APIC_TMCCT);
-#ifdef CONFIG_X86_PM_TIMER
-       if (apic_calibrate_pmtmr && pmtmr_ioport) {
-               pmtimer_wait(5000);  /* 5ms wait */
-               apic = apic_read(APIC_TMCCT);
-               result = (apic_start - apic) * 1000L / 5;
-       } else
-#endif
-       {
-               rdtscll(tsc_start);
-
-               do {
-                       apic = apic_read(APIC_TMCCT);
-                       rdtscll(tsc);
-               } while ((tsc - tsc_start) < TICK_COUNT &&
-                               (apic_start - apic) < TICK_COUNT);
-
-               result = (apic_start - apic) * 1000L * tsc_khz /
-                                       (tsc - tsc_start);
-       }
-
-       local_irq_enable();
-
-       printk(KERN_DEBUG "APIC timer calibration result %d\n", result);
-
-       printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n",
-               result / 1000 / 1000, result / 1000 % 1000);
-
-       /* Calculate the scaled math multiplication factor */
-       lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
-                                      lapic_clockevent.shift);
-       lapic_clockevent.max_delta_ns =
-               clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
-       lapic_clockevent.min_delta_ns =
-               clockevent_delta2ns(0xF, &lapic_clockevent);
-
-       calibration_result = (result * APIC_DIVISOR) / HZ;
-
-       /*
-        * Do a sanity check on the APIC calibration result
-        */
-       if (calibration_result < (1000000 / HZ)) {
-               printk(KERN_WARNING
-                       "APIC frequency too slow, disabling apic timer\n");
-               return -1;
-       }
-
-       return 0;
-}
-
-/*
- * Setup the boot APIC
- *
- * Calibrate and verify the result.
- */
-void __init setup_boot_APIC_clock(void)
-{
-       /*
-        * The local apic timer can be disabled via the kernel
-        * commandline or from the CPU detection code. Register the lapic
-        * timer as a dummy clock event source on SMP systems, so the
-        * broadcast mechanism is used. On UP systems simply ignore it.
-        */
-       if (disable_apic_timer) {
-               printk(KERN_INFO "Disabling APIC timer\n");
-               /* No broadcast on UP ! */
-               if (num_possible_cpus() > 1) {
-                       lapic_clockevent.mult = 1;
-                       setup_APIC_timer();
-               }
-               return;
-       }
-
-       apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
-                   "calibrating APIC timer ...\n");
-
-       if (calibrate_APIC_clock()) {
-               /* No broadcast on UP ! */
-               if (num_possible_cpus() > 1)
-                       setup_APIC_timer();
-               return;
-       }
-
-       /*
-        * If nmi_watchdog is set to IO_APIC, we need the
-        * PIT/HPET going.  Otherwise register lapic as a dummy
-        * device.
-        */
-       if (nmi_watchdog != NMI_IO_APIC)
-               lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-       else
-               printk(KERN_WARNING "APIC timer registered as dummy,"
-                       " due to nmi_watchdog=%d!\n", nmi_watchdog);
-
-       /* Setup the lapic or request the broadcast */
-       setup_APIC_timer();
-}
-
-void __cpuinit setup_secondary_APIC_clock(void)
-{
-       setup_APIC_timer();
-}
-
-/*
- * The guts of the apic timer interrupt
- */
-static void local_apic_timer_interrupt(void)
-{
-       int cpu = smp_processor_id();
-       struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
-
-       /*
-        * Normally we should not be here till LAPIC has been initialized but
-        * in some cases like kdump, its possible that there is a pending LAPIC
-        * timer interrupt from previous kernel's context and is delivered in
-        * new kernel the moment interrupts are enabled.
-        *
-        * Interrupts are enabled early and LAPIC is setup much later, hence
-        * its possible that when we get here evt->event_handler is NULL.
-        * Check for event_handler being NULL and discard the interrupt as
-        * spurious.
-        */
-       if (!evt->event_handler) {
-               printk(KERN_WARNING
-                      "Spurious LAPIC timer interrupt on cpu %d\n", cpu);
-               /* Switch it off */
-               lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
-               return;
-       }
-
-       /*
-        * the NMI deadlock-detector uses this.
-        */
-#ifdef CONFIG_X86_64
-       add_pda(apic_timer_irqs, 1);
-#else
-       per_cpu(irq_stat, cpu).apic_timer_irqs++;
-#endif
-
-       evt->event_handler(evt);
-}
-
-/*
- * Local APIC timer interrupt. This is the most natural way for doing
- * local interrupts, but local timer interrupts can be emulated by
- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
- *
- * [ if a single-CPU system runs an SMP kernel then we call the local
- *   interrupt as well. Thus we cannot inline the local irq ... ]
- */
-void smp_apic_timer_interrupt(struct pt_regs *regs)
-{
-       struct pt_regs *old_regs = set_irq_regs(regs);
-
-       /*
-        * NOTE! We'd better ACK the irq immediately,
-        * because timer handling can be slow.
-        */
-       ack_APIC_irq();
-       /*
-        * update_process_times() expects us to have done irq_enter().
-        * Besides, if we don't timer interrupts ignore the global
-        * interrupt lock, which is the WrongThing (tm) to do.
-        */
-       exit_idle();
-       irq_enter();
-       local_apic_timer_interrupt();
-       irq_exit();
-
-       set_irq_regs(old_regs);
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
-       return -EINVAL;
-}
-
-
-/*
- * Local APIC start and shutdown
- */
-
-/**
- * clear_local_APIC - shutdown the local APIC
- *
- * This is called, when a CPU is disabled and before rebooting, so the state of
- * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
- * leftovers during boot.
- */
-void clear_local_APIC(void)
-{
-       int maxlvt;
-       u32 v;
-
-       /* APIC hasn't been mapped yet */
-       if (!apic_phys)
-               return;
-
-       maxlvt = lapic_get_maxlvt();
-       /*
-        * Masking an LVT entry can trigger a local APIC error
-        * if the vector is zero. Mask LVTERR first to prevent this.
-        */
-       if (maxlvt >= 3) {
-               v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
-               apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
-       }
-       /*
-        * Careful: we have to set masks only first to deassert
-        * any level-triggered sources.
-        */
-       v = apic_read(APIC_LVTT);
-       apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
-       v = apic_read(APIC_LVT0);
-       apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
-       v = apic_read(APIC_LVT1);
-       apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
-       if (maxlvt >= 4) {
-               v = apic_read(APIC_LVTPC);
-               apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
-       }
-
-       /* lets not touch this if we didn't frob it */
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
-       if (maxlvt >= 5) {
-               v = apic_read(APIC_LVTTHMR);
-               apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
-       }
-#endif
-       /*
-        * Clean APIC state for other OSs:
-        */
-       apic_write(APIC_LVTT, APIC_LVT_MASKED);
-       apic_write(APIC_LVT0, APIC_LVT_MASKED);
-       apic_write(APIC_LVT1, APIC_LVT_MASKED);
-       if (maxlvt >= 3)
-               apic_write(APIC_LVTERR, APIC_LVT_MASKED);
-       if (maxlvt >= 4)
-               apic_write(APIC_LVTPC, APIC_LVT_MASKED);
-
-       /* Integrated APIC (!82489DX) ? */
-       if (lapic_is_integrated()) {
-               if (maxlvt > 3)
-                       /* Clear ESR due to Pentium errata 3AP and 11AP */
-                       apic_write(APIC_ESR, 0);
-               apic_read(APIC_ESR);
-       }
-}
-
-/**
- * disable_local_APIC - clear and disable the local APIC
- */
-void disable_local_APIC(void)
-{
-       unsigned int value;
-
-       clear_local_APIC();
-
-       /*
-        * Disable APIC (implies clearing of registers
-        * for 82489DX!).
-        */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_SPIV_APIC_ENABLED;
-       apic_write(APIC_SPIV, value);
-
-#ifdef CONFIG_X86_32
-       /*
-        * When LAPIC was disabled by the BIOS and enabled by the kernel,
-        * restore the disabled state.
-        */
-       if (enabled_via_apicbase) {
-               unsigned int l, h;
-
-               rdmsr(MSR_IA32_APICBASE, l, h);
-               l &= ~MSR_IA32_APICBASE_ENABLE;
-               wrmsr(MSR_IA32_APICBASE, l, h);
-       }
-#endif
-}
-
-/*
- * If Linux enabled the LAPIC against the BIOS default disable it down before
- * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
- * not power-off.  Additionally clear all LVT entries before disable_local_APIC
- * for the case where Linux didn't enable the LAPIC.
- */
-void lapic_shutdown(void)
-{
-       unsigned long flags;
-
-       if (!cpu_has_apic)
-               return;
-
-       local_irq_save(flags);
-
-#ifdef CONFIG_X86_32
-       if (!enabled_via_apicbase)
-               clear_local_APIC();
-       else
-#endif
-               disable_local_APIC();
-
-
-       local_irq_restore(flags);
-}
-
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
-       unsigned int reg0, reg1;
-
-       /*
-        * The version register is read-only in a real APIC.
-        */
-       reg0 = apic_read(APIC_LVR);
-       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-       apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-       reg1 = apic_read(APIC_LVR);
-       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
-       /*
-        * The two version reads above should print the same
-        * numbers.  If the second one is different, then we
-        * poke at a non-APIC.
-        */
-       if (reg1 != reg0)
-               return 0;
-
-       /*
-        * Check if the version looks reasonably.
-        */
-       reg1 = GET_APIC_VERSION(reg0);
-       if (reg1 == 0x00 || reg1 == 0xff)
-               return 0;
-       reg1 = lapic_get_maxlvt();
-       if (reg1 < 0x02 || reg1 == 0xff)
-               return 0;
-
-       /*
-        * The ID register is read/write in a real APIC.
-        */
-       reg0 = apic_read(APIC_ID);
-       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-       apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-       reg1 = apic_read(APIC_ID);
-       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-       apic_write(APIC_ID, reg0);
-       if (reg1 != (reg0 ^ APIC_ID_MASK))
-               return 0;
-
-       /*
-        * The next two are just to see if we have sane values.
-        * They're only really relevant if we're in Virtual Wire
-        * compatibility mode, but most boxes are anymore.
-        */
-       reg0 = apic_read(APIC_LVT0);
-       apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
-       reg1 = apic_read(APIC_LVT1);
-       apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
-       return 1;
-}
-
-/**
- * sync_Arb_IDs - synchronize APIC bus arbitration IDs
- */
-void __init sync_Arb_IDs(void)
-{
-       /*
-        * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
-        * needed on AMD.
-        */
-       if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
-               return;
-
-       /*
-        * Wait for idle.
-        */
-       apic_wait_icr_idle();
-
-       apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-       apic_write(APIC_ICR, APIC_DEST_ALLINC |
-                       APIC_INT_LEVELTRIG | APIC_DM_INIT);
-}
-
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
-{
-       unsigned int value;
-
-       /*
-        * Don't do the setup now if we have a SMP BIOS as the
-        * through-I/O-APIC virtual wire mode might be active.
-        */
-       if (smp_found_config || !cpu_has_apic)
-               return;
-
-       /*
-        * Do not trust the local APIC being empty at bootup.
-        */
-       clear_local_APIC();
-
-       /*
-        * Enable APIC.
-        */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_VECTOR_MASK;
-       value |= APIC_SPIV_APIC_ENABLED;
-
-#ifdef CONFIG_X86_32
-       /* This bit is reserved on P4/Xeon and should be cleared */
-       if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-           (boot_cpu_data.x86 == 15))
-               value &= ~APIC_SPIV_FOCUS_DISABLED;
-       else
-#endif
-               value |= APIC_SPIV_FOCUS_DISABLED;
-       value |= SPURIOUS_APIC_VECTOR;
-       apic_write(APIC_SPIV, value);
-
-       /*
-        * Set up the virtual wire mode.
-        */
-       apic_write(APIC_LVT0, APIC_DM_EXTINT);
-       value = APIC_DM_NMI;
-       if (!lapic_is_integrated())             /* 82489DX */
-               value |= APIC_LVT_LEVEL_TRIGGER;
-       apic_write(APIC_LVT1, value);
-}
-
-static void __cpuinit lapic_setup_esr(void)
-{
-       unsigned long oldvalue, value, maxlvt;
-       if (lapic_is_integrated() && !esr_disable) {
-               if (esr_disable) {
-                       /*
-                        * Something untraceable is creating bad interrupts on
-                        * secondary quads ... for the moment, just leave the
-                        * ESR disabled - we can't do anything useful with the
-                        * errors anyway - mbligh
-                        */
-                       printk(KERN_INFO "Leaving ESR disabled.\n");
-                       return;
-               }
-               /* !82489DX */
-               maxlvt = lapic_get_maxlvt();
-               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-                       apic_write(APIC_ESR, 0);
-               oldvalue = apic_read(APIC_ESR);
-
-               /* enables sending errors */
-               value = ERROR_APIC_VECTOR;
-               apic_write(APIC_LVTERR, value);
-               /*
-                * spec says clear errors after enabling vector.
-                */
-               if (maxlvt > 3)
-                       apic_write(APIC_ESR, 0);
-               value = apic_read(APIC_ESR);
-               if (value != oldvalue)
-                       apic_printk(APIC_VERBOSE, "ESR value before enabling "
-                               "vector: 0x%08lx  after: 0x%08lx\n",
-                               oldvalue, value);
-       } else {
-               printk(KERN_INFO "No ESR for 82489DX.\n");
-       }
-}
-
-
-/**
- * setup_local_APIC - setup the local APIC
- */
-void __cpuinit setup_local_APIC(void)
-{
-       unsigned int value;
-       int i, j;
-
-       preempt_disable();
-       value = apic_read(APIC_LVR);
-
-       BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
-
-       /*
-        * Double-check whether this APIC is really registered.
-        * This is meaningless in clustered apic mode, so we skip it.
-        */
-       if (!apic_id_registered())
-               BUG();
-
-       /*
-        * Intel recommends to set DFR, LDR and TPR before enabling
-        * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
-        * document number 292116).  So here it goes...
-        */
-       init_apic_ldr();
-
-       /*
-        * Set Task Priority to 'accept all'. We never change this
-        * later on.
-        */
-       value = apic_read(APIC_TASKPRI);
-       value &= ~APIC_TPRI_MASK;
-       apic_write(APIC_TASKPRI, value);
-
-       /*
-        * After a crash, we no longer service the interrupts and a pending
-        * interrupt from previous kernel might still have ISR bit set.
-        *
-        * Most probably by now CPU has serviced that pending interrupt and
-        * it might not have done the ack_APIC_irq() because it thought,
-        * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
-        * does not clear the ISR bit and cpu thinks it has already serivced
-        * the interrupt. Hence a vector might get locked. It was noticed
-        * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
-        */
-       for (i = APIC_ISR_NR - 1; i >= 0; i--) {
-               value = apic_read(APIC_ISR + i*0x10);
-               for (j = 31; j >= 0; j--) {
-                       if (value & (1<<j))
-                               ack_APIC_irq();
-               }
-       }
-
-       /*
-        * Now that we are all set up, enable the APIC
-        */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_VECTOR_MASK;
-       /*
-        * Enable APIC
-        */
-       value |= APIC_SPIV_APIC_ENABLED;
-
-       /* We always use processor focus */
-
-       /*
-        * Set spurious IRQ vector
-        */
-       value |= SPURIOUS_APIC_VECTOR;
-       apic_write(APIC_SPIV, value);
-
-       /*
-        * Set up LVT0, LVT1:
-        *
-        * set up through-local-APIC on the BP's LINT0. This is not
-        * strictly necessary in pure symmetric-IO mode, but sometimes
-        * we delegate interrupts to the 8259A.
-        */
-       /*
-        * TODO: set up through-local-APIC from through-I/O-APIC? --macro
-        */
-       value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
-       if (!smp_processor_id() && !value) {
-               value = APIC_DM_EXTINT;
-               apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
-                           smp_processor_id());
-       } else {
-               value = APIC_DM_EXTINT | APIC_LVT_MASKED;
-               apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
-                           smp_processor_id());
-       }
-       apic_write(APIC_LVT0, value);
-
-       /*
-        * only the BP should see the LINT1 NMI signal, obviously.
-        */
-       if (!smp_processor_id())
-               value = APIC_DM_NMI;
-       else
-               value = APIC_DM_NMI | APIC_LVT_MASKED;
-       apic_write(APIC_LVT1, value);
-       preempt_enable();
-}
-
-void __cpuinit end_local_APIC_setup(void)
-{
-       lapic_setup_esr();
-
-#ifdef CONFIG_X86_32
-       {
-               unsigned int value;
-               /* Disable the local apic timer */
-               value = apic_read(APIC_LVTT);
-               value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-               apic_write(APIC_LVTT, value);
-       }
-#endif
-
-       setup_apic_nmi_watchdog(NULL);
-       apic_pm_activate();
-}
-
-void check_x2apic(void)
-{
-       int msr, msr2;
-
-       rdmsr(MSR_IA32_APICBASE, msr, msr2);
-
-       if (msr & X2APIC_ENABLE) {
-               printk("x2apic enabled by BIOS, switching to x2apic ops\n");
-               x2apic_preenabled = x2apic = 1;
-               apic_ops = &x2apic_ops;
-       }
-}
-
-void enable_x2apic(void)
-{
-       int msr, msr2;
-
-       rdmsr(MSR_IA32_APICBASE, msr, msr2);
-       if (!(msr & X2APIC_ENABLE)) {
-               printk("Enabling x2apic\n");
-               wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
-       }
-}
-
-void enable_IR_x2apic(void)
-{
-#ifdef CONFIG_INTR_REMAP
-       int ret;
-       unsigned long flags;
-
-       if (!cpu_has_x2apic)
-               return;
-
-       if (!x2apic_preenabled && disable_x2apic) {
-               printk(KERN_INFO
-                      "Skipped enabling x2apic and Interrupt-remapping "
-                      "because of nox2apic\n");
-               return;
-       }
-
-       if (x2apic_preenabled && disable_x2apic)
-               panic("Bios already enabled x2apic, can't enforce nox2apic");
-
-       if (!x2apic_preenabled && skip_ioapic_setup) {
-               printk(KERN_INFO
-                      "Skipped enabling x2apic and Interrupt-remapping "
-                      "because of skipping io-apic setup\n");
-               return;
-       }
-
-       ret = dmar_table_init();
-       if (ret) {
-               printk(KERN_INFO
-                      "dmar_table_init() failed with %d:\n", ret);
-
-               if (x2apic_preenabled)
-                       panic("x2apic enabled by bios. But IR enabling failed");
-               else
-                       printk(KERN_INFO
-                              "Not enabling x2apic,Intr-remapping\n");
-               return;
-       }
-
-       local_irq_save(flags);
-       mask_8259A();
-       save_mask_IO_APIC_setup();
-
-       ret = enable_intr_remapping(1);
-
-       if (ret && x2apic_preenabled) {
-               local_irq_restore(flags);
-               panic("x2apic enabled by bios. But IR enabling failed");
-       }
-
-       if (ret)
-               goto end;
-
-       if (!x2apic) {
-               x2apic = 1;
-               apic_ops = &x2apic_ops;
-               enable_x2apic();
-       }
-end:
-       if (ret)
-               /*
-                * IR enabling failed
-                */
-               restore_IO_APIC_setup();
-       else
-               reinit_intr_remapped_IO_APIC(x2apic_preenabled);
-
-       unmask_8259A();
-       local_irq_restore(flags);
-
-       if (!ret) {
-               if (!x2apic_preenabled)
-                       printk(KERN_INFO
-                              "Enabled x2apic and interrupt-remapping\n");
-               else
-                       printk(KERN_INFO
-                              "Enabled Interrupt-remapping\n");
-       } else
-               printk(KERN_ERR
-                      "Failed to enable Interrupt-remapping and x2apic\n");
-#else
-       if (!cpu_has_x2apic)
-               return;
-
-       if (x2apic_preenabled)
-               panic("x2apic enabled prior OS handover,"
-                     " enable CONFIG_INTR_REMAP");
-
-       printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-              " and x2apic\n");
-#endif
-
-       return;
-}
-
-/*
- * Detect and enable local APICs on non-SMP boards.
- * Original code written by Keir Fraser.
- * On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.)
- */
-static int __init detect_init_APIC(void)
-{
-       if (!cpu_has_apic) {
-               printk(KERN_INFO "No local APIC present\n");
-               return -1;
-       }
-
-       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-       boot_cpu_physical_apicid = 0;
-       return 0;
-}
-
-void __init early_init_lapic_mapping(void)
-{
-       unsigned long phys_addr;
-
-       /*
-        * If no local APIC can be found then go out
-        * : it means there is no mpatable and MADT
-        */
-       if (!smp_found_config)
-               return;
-
-       phys_addr = mp_lapic_addr;
-
-       set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
-       apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-                   APIC_BASE, phys_addr);
-
-       /*
-        * Fetch the APIC ID of the BSP in case we have a
-        * default configuration (or the MP table is broken).
-        */
-       boot_cpu_physical_apicid = read_apic_id();
-}
-
-/**
- * init_apic_mappings - initialize APIC mappings
- */
-void __init init_apic_mappings(void)
-{
-       if (x2apic) {
-               boot_cpu_physical_apicid = read_apic_id();
-               return;
-       }
-
-       /*
-        * If no local APIC can be found then set up a fake all
-        * zeroes page to simulate the local APIC and another
-        * one for the IO-APIC.
-        */
-       if (!smp_found_config && detect_init_APIC()) {
-               apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
-               apic_phys = __pa(apic_phys);
-       } else
-               apic_phys = mp_lapic_addr;
-
-       set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-       apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-                               APIC_BASE, apic_phys);
-
-       /*
-        * Fetch the APIC ID of the BSP in case we have a
-        * default configuration (or the MP table is broken).
-        */
-       boot_cpu_physical_apicid = read_apic_id();
-}
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int apic_version[MAX_APICS];
-
-int __init APIC_init_uniprocessor(void)
-{
-       if (disable_apic) {
-               printk(KERN_INFO "Apic disabled\n");
-               return -1;
-       }
-       if (!cpu_has_apic) {
-               disable_apic = 1;
-               printk(KERN_INFO "Apic disabled by BIOS\n");
-               return -1;
-       }
-
-       enable_IR_x2apic();
-       setup_apic_routing();
-
-       verify_local_APIC();
-
-       connect_bsp_APIC();
-
-       physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-       apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
-
-       setup_local_APIC();
-
-       /*
-        * Now enable IO-APICs, actually call clear_IO_APIC
-        * We need clear_IO_APIC before enabling vector on BP
-        */
-       if (!skip_ioapic_setup && nr_ioapics)
-               enable_IO_APIC();
-
-       if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
-               localise_nmi_watchdog();
-       end_local_APIC_setup();
-
-       if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
-               setup_IO_APIC();
-       else
-               nr_ioapics = 0;
-       setup_boot_APIC_clock();
-       check_nmi_watchdog();
-       return 0;
-}
-
-/*
- * Local APIC interrupts
- */
-
-/*
- * This interrupt should _never_ happen with our APIC/SMP architecture
- */
-asmlinkage void smp_spurious_interrupt(void)
-{
-       unsigned int v;
-       exit_idle();
-       irq_enter();
-       /*
-        * Check if this really is a spurious interrupt and ACK it
-        * if it is a vectored one.  Just in case...
-        * Spurious interrupts should not be ACKed.
-        */
-       v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-       if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
-               ack_APIC_irq();
-
-       add_pda(irq_spurious_count, 1);
-       irq_exit();
-}
-
-/*
- * This interrupt should never happen with our APIC/SMP architecture
- */
-asmlinkage void smp_error_interrupt(void)
-{
-       unsigned int v, v1;
-
-       exit_idle();
-       irq_enter();
-       /* First tickle the hardware, only then report what went on. -- REW */
-       v = apic_read(APIC_ESR);
-       apic_write(APIC_ESR, 0);
-       v1 = apic_read(APIC_ESR);
-       ack_APIC_irq();
-       atomic_inc(&irq_err_count);
-
-       /* Here is what the APIC error bits mean:
-          0: Send CS error
-          1: Receive CS error
-          2: Send accept error
-          3: Receive accept error
-          4: Reserved
-          5: Send illegal vector
-          6: Received illegal vector
-          7: Illegal register address
-       */
-       printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
-               smp_processor_id(), v , v1);
-       irq_exit();
-}
-
-/**
- * connect_bsp_APIC - attach the APIC to the interrupt system
- */
-void __init connect_bsp_APIC(void)
-{
-#ifdef CONFIG_X86_32
-       if (pic_mode) {
-               /*
-                * Do not trust the local APIC being empty at bootup.
-                */
-               clear_local_APIC();
-               /*
-                * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
-                * local APIC to INT and NMI lines.
-                */
-               apic_printk(APIC_VERBOSE, "leaving PIC mode, "
-                               "enabling APIC mode.\n");
-               outb(0x70, 0x22);
-               outb(0x01, 0x23);
-       }
-#endif
-       enable_apic_mode();
-}
-
-/**
- * disconnect_bsp_APIC - detach the APIC from the interrupt system
- * @virt_wire_setup:   indicates, whether virtual wire mode is selected
- *
- * Virtual wire mode is necessary to deliver legacy interrupts even when the
- * APIC is disabled.
- */
-void disconnect_bsp_APIC(int virt_wire_setup)
-{
-       unsigned int value;
-
-#ifdef CONFIG_X86_32
-       if (pic_mode) {
-               /*
-                * Put the board back into PIC mode (has an effect only on
-                * certain older boards).  Note that APIC interrupts, including
-                * IPIs, won't work beyond this point!  The only exception are
-                * INIT IPIs.
-                */
-               apic_printk(APIC_VERBOSE, "disabling APIC mode, "
-                               "entering PIC mode.\n");
-               outb(0x70, 0x22);
-               outb(0x00, 0x23);
-               return;
-       }
-#endif
-
-       /* Go back to Virtual Wire compatibility mode */
-
-       /* For the spurious interrupt use vector F, and enable it */
-       value = apic_read(APIC_SPIV);
-       value &= ~APIC_VECTOR_MASK;
-       value |= APIC_SPIV_APIC_ENABLED;
-       value |= 0xf;
-       apic_write(APIC_SPIV, value);
-
-       if (!virt_wire_setup) {
-               /*
-                * For LVT0 make it edge triggered, active high,
-                * external and enabled
-                */
-               value = apic_read(APIC_LVT0);
-               value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-               value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-               value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
-               apic_write(APIC_LVT0, value);
-       } else {
-               /* Disable LVT0 */
-               apic_write(APIC_LVT0, APIC_LVT_MASKED);
-       }
-
-       /*
-        * For LVT1 make it edge triggered, active high,
-        * nmi and enabled
-        */
-       value = apic_read(APIC_LVT1);
-       value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
-                       APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
-                       APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
-       value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
-       value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-       apic_write(APIC_LVT1, value);
-}
-
-void __cpuinit generic_processor_info(int apicid, int version)
-{
-       int cpu;
-       cpumask_t tmp_map;
-
-       /*
-        * Validate version
-        */
-       if (version == 0x0) {
-               printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
-                               "fixing up to 0x10. (tell your hw vendor)\n",
-                               version);
-               version = 0x10;
-       }
-       apic_version[apicid] = version;
-
-       if (num_processors >= NR_CPUS) {
-               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-                       "  Processor ignored.\n", NR_CPUS);
-               return;
-       }
-
-       num_processors++;
-       cpus_complement(tmp_map, cpu_present_map);
-       cpu = first_cpu(tmp_map);
-
-       physid_set(apicid, phys_cpu_present_map);
-       if (apicid == boot_cpu_physical_apicid) {
-               /*
-                * x86_bios_cpu_apicid is required to have processors listed
-                * in same order as logical cpu numbers. Hence the first
-                * entry is BSP, and so on.
-                */
-               cpu = 0;
-       }
-       if (apicid > max_physical_apicid)
-               max_physical_apicid = apicid;
-
-#ifdef CONFIG_X86_32
-       /*
-        * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
-        * but we need to work other dependencies like SMP_SUSPEND etc
-        * before this can be done without some confusion.
-        * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
-        *       - Ashok Raj <ashok.raj@intel.com>
-        */
-       if (max_physical_apicid >= 8) {
-               switch (boot_cpu_data.x86_vendor) {
-               case X86_VENDOR_INTEL:
-                       if (!APIC_XAPIC(version)) {
-                               def_to_bigsmp = 0;
-                               break;
-                       }
-                       /* If P4 and above fall through */
-               case X86_VENDOR_AMD:
-                       def_to_bigsmp = 1;
-               }
-       }
-#endif
-
-#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
-       /* are we being called early in kernel startup? */
-       if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
-               u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
-               u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-
-               cpu_to_apicid[cpu] = apicid;
-               bios_cpu_apicid[cpu] = apicid;
-       } else {
-               per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-               per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
-       }
-#endif
-
-       cpu_set(cpu, cpu_possible_map);
-       cpu_set(cpu, cpu_present_map);
-}
-
-int hard_smp_processor_id(void)
-{
-       return read_apic_id();
-}
-
-/*
- * Power management
- */
-#ifdef CONFIG_PM
-
-static struct {
-       /*
-        * 'active' is true if the local APIC was enabled by us and
-        * not the BIOS; this signifies that we are also responsible
-        * for disabling it before entering apm/acpi suspend
-        */
-       int active;
-       /* r/w apic fields */
-       unsigned int apic_id;
-       unsigned int apic_taskpri;
-       unsigned int apic_ldr;
-       unsigned int apic_dfr;
-       unsigned int apic_spiv;
-       unsigned int apic_lvtt;
-       unsigned int apic_lvtpc;
-       unsigned int apic_lvt0;
-       unsigned int apic_lvt1;
-       unsigned int apic_lvterr;
-       unsigned int apic_tmict;
-       unsigned int apic_tdcr;
-       unsigned int apic_thmr;
-} apic_pm_state;
-
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
-{
-       unsigned long flags;
-       int maxlvt;
-
-       if (!apic_pm_state.active)
-               return 0;
-
-       maxlvt = lapic_get_maxlvt();
-
-       apic_pm_state.apic_id = apic_read(APIC_ID);
-       apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
-       apic_pm_state.apic_ldr = apic_read(APIC_LDR);
-       apic_pm_state.apic_dfr = apic_read(APIC_DFR);
-       apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
-       apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
-       if (maxlvt >= 4)
-               apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
-       apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
-       apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
-       apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
-       apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
-       apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-       if (maxlvt >= 5)
-               apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
-#endif
-
-       local_irq_save(flags);
-       disable_local_APIC();
-       local_irq_restore(flags);
-       return 0;
-}
-
-static int lapic_resume(struct sys_device *dev)
-{
-       unsigned int l, h;
-       unsigned long flags;
-       int maxlvt;
-
-       if (!apic_pm_state.active)
-               return 0;
-
-       maxlvt = lapic_get_maxlvt();
-
-       local_irq_save(flags);
-
-#ifdef CONFIG_X86_64
-       if (x2apic)
-               enable_x2apic();
-       else
-#endif
-       {
-               /*
-                * Make sure the APICBASE points to the right address
-                *
-                * FIXME! This will be wrong if we ever support suspend on
-                * SMP! We'll need to do this as part of the CPU restore!
-                */
-               rdmsr(MSR_IA32_APICBASE, l, h);
-               l &= ~MSR_IA32_APICBASE_BASE;
-               l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-               wrmsr(MSR_IA32_APICBASE, l, h);
-       }
-
-       apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
-       apic_write(APIC_ID, apic_pm_state.apic_id);
-       apic_write(APIC_DFR, apic_pm_state.apic_dfr);
-       apic_write(APIC_LDR, apic_pm_state.apic_ldr);
-       apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
-       apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
-       apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
-       apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
-       if (maxlvt >= 5)
-               apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
-#endif
-       if (maxlvt >= 4)
-               apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
-       apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
-       apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
-       apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
-       apic_write(APIC_ESR, 0);
-       apic_read(APIC_ESR);
-       apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
-       apic_write(APIC_ESR, 0);
-       apic_read(APIC_ESR);
-
-       local_irq_restore(flags);
-
-       return 0;
-}
-
-/*
- * This device has no shutdown method - fully functioning local APICs
- * are needed on every CPU up until machine_halt/restart/poweroff.
- */
-
-static struct sysdev_class lapic_sysclass = {
-       .name           = "lapic",
-       .resume         = lapic_resume,
-       .suspend        = lapic_suspend,
-};
-
-static struct sys_device device_lapic = {
-       .id     = 0,
-       .cls    = &lapic_sysclass,
-};
-
-static void __cpuinit apic_pm_activate(void)
-{
-       apic_pm_state.active = 1;
-}
-
-static int __init init_lapic_sysfs(void)
-{
-       int error;
-
-       if (!cpu_has_apic)
-               return 0;
-       /* XXX: remove suspend/resume procs if !apic_pm_state.active? */
-
-       error = sysdev_class_register(&lapic_sysclass);
-       if (!error)
-               error = sysdev_register(&device_lapic);
-       return error;
-}
-device_initcall(init_lapic_sysfs);
-
-#else  /* CONFIG_PM */
-
-static void apic_pm_activate(void) { }
-
-#endif /* CONFIG_PM */
-
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
-{
-       int i, clusters, zeros;
-       unsigned id;
-       u16 *bios_cpu_apicid;
-       DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
-
-       /*
-        * there is not this kind of box with AMD CPU yet.
-        * Some AMD box with quadcore cpu and 8 sockets apicid
-        * will be [4, 0x23] or [8, 0x27] could be thought to
-        * vsmp box still need checking...
-        */
-       if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
-               return 0;
-
-       bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
-       bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
-
-       for (i = 0; i < NR_CPUS; i++) {
-               /* are we being called early in kernel startup? */
-               if (bios_cpu_apicid) {
-                       id = bios_cpu_apicid[i];
-               }
-               else if (i < nr_cpu_ids) {
-                       if (cpu_present(i))
-                               id = per_cpu(x86_bios_cpu_apicid, i);
-                       else
-                               continue;
-               }
-               else
-                       break;
-
-               if (id != BAD_APICID)
-                       __set_bit(APIC_CLUSTERID(id), clustermap);
-       }
-
-       /* Problem:  Partially populated chassis may not have CPUs in some of
-        * the APIC clusters they have been allocated.  Only present CPUs have
-        * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
-        * Since clusters are allocated sequentially, count zeros only if
-        * they are bounded by ones.
-        */
-       clusters = 0;
-       zeros = 0;
-       for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
-               if (test_bit(i, clustermap)) {
-                       clusters += 1 + zeros;
-                       zeros = 0;
-               } else
-                       ++zeros;
-       }
-
-       /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
-        * not guaranteed to be synced between boards
-        */
-       if (is_vsmp_box() && clusters > 1)
-               return 1;
-
-       /*
-        * If clusters > 2, then should be multi-chassis.
-        * May have to revisit this when multi-core + hyperthreaded CPUs come
-        * out, but AFAIK this will work even for them.
-        */
-       return (clusters > 2);
-}
-
-static __init int setup_nox2apic(char *str)
-{
-       disable_x2apic = 1;
-       clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
-       return 0;
-}
-early_param("nox2apic", setup_nox2apic);
-
-
-/*
- * APIC command line parameters
- */
-static int __init setup_disableapic(char *arg)
-{
-       disable_apic = 1;
-       setup_clear_cpu_cap(X86_FEATURE_APIC);
-       return 0;
-}
-early_param("disableapic", setup_disableapic);
-
-/* same as disableapic, for compatibility */
-static int __init setup_nolapic(char *arg)
-{
-       return setup_disableapic(arg);
-}
-early_param("nolapic", setup_nolapic);
-
-static int __init parse_lapic_timer_c2_ok(char *arg)
-{
-       local_apic_timer_c2_ok = 1;
-       return 0;
-}
-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-
-static int __init parse_disable_apic_timer(char *arg)
-{
-       disable_apic_timer = 1;
-       return 0;
-}
-early_param("noapictimer", parse_disable_apic_timer);
-
-static int __init parse_nolapic_timer(char *arg)
-{
-       disable_apic_timer = 1;
-       return 0;
-}
-early_param("nolapic_timer", parse_nolapic_timer);
-
-static __init int setup_apicpmtimer(char *s)
-{
-       apic_calibrate_pmtmr = 1;
-       notsc_setup(NULL);
-       return 0;
-}
-__setup("apicpmtimer", setup_apicpmtimer);
-
-static int __init apic_set_verbosity(char *arg)
-{
-       if (!arg)  {
-#ifdef CONFIG_X86_64
-               skip_ioapic_setup = 0;
-               ioapic_force = 1;
-               return 0;
-#endif
-               return -EINVAL;
-       }
-
-       if (strcmp("debug", arg) == 0)
-               apic_verbosity = APIC_DEBUG;
-       else if (strcmp("verbose", arg) == 0)
-               apic_verbosity = APIC_VERBOSE;
-       else {
-               printk(KERN_WARNING "APIC Verbosity level %s not recognised"
-                       " use apic=verbose or apic=debug\n", arg);
-               return -EINVAL;
-       }
-
-       return 0;
-}
-early_param("apic", apic_set_verbosity);
-
-static int __init lapic_insert_resource(void)
-{
-       if (!apic_phys)
-               return -1;
-
-       /* Put local APIC into the resource map. */
-       lapic_resource.start = apic_phys;
-       lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
-       insert_resource(&iomem_resource, &lapic_resource);
-
-       return 0;
-}
-
-/*
- * need call insert after e820_reserve_resources()
- * that is using request_resource
- */
-late_initcall(lapic_insert_resource);
index fdd585f..f0dfe6f 100644 (file)
@@ -1,8 +1,6 @@
 /*
  * BIOS run time interface routines.
  *
- *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
- *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ *  Copyright (c) Russ Anderson
  */
 
+#include <linux/efi.h>
+#include <asm/efi.h>
+#include <linux/io.h>
 #include <asm/uv/bios.h>
+#include <asm/uv/uv_hub.h>
+
+struct uv_systab uv_systab;
 
-const char *
-x86_bios_strerror(long status)
+s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
 {
-       const char *str;
-       switch (status) {
-       case  0: str = "Call completed without error";  break;
-       case -1: str = "Not implemented";               break;
-       case -2: str = "Invalid argument";              break;
-       case -3: str = "Call completed with error";     break;
-       default: str = "Unknown BIOS status code";      break;
-       }
-       return str;
+       struct uv_systab *tab = &uv_systab;
+
+       if (!tab->function)
+               /*
+                * BIOS does not support UV systab
+                */
+               return BIOS_STATUS_UNIMPLEMENTED;
+
+       return efi_call6((void *)__va(tab->function),
+                                       (u64)which, a1, a2, a3, a4, a5);
 }
 
-long
-x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second,
-                  unsigned long *drift_info)
+s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+                                       u64 a4, u64 a5)
 {
-       struct uv_bios_retval isrv;
+       unsigned long bios_flags;
+       s64 ret;
 
-       BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0);
-       *ticks_per_second = isrv.v0;
-       *drift_info = isrv.v1;
-       return isrv.status;
+       local_irq_save(bios_flags);
+       ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+       local_irq_restore(bios_flags);
+
+       return ret;
 }
-EXPORT_SYMBOL_GPL(x86_bios_freq_base);
+
+s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
+                                       u64 a4, u64 a5)
+{
+       s64 ret;
+
+       preempt_disable();
+       ret = uv_bios_call(which, a1, a2, a3, a4, a5);
+       preempt_enable();
+
+       return ret;
+}
+
+
+long sn_partition_id;
+EXPORT_SYMBOL_GPL(sn_partition_id);
+long uv_coherency_id;
+EXPORT_SYMBOL_GPL(uv_coherency_id);
+long uv_region_size;
+EXPORT_SYMBOL_GPL(uv_region_size);
+int uv_type;
+
+
+s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher,
+               long *region)
+{
+       s64 ret;
+       u64 v0, v1;
+       union partition_info_u part;
+
+       ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc,
+                               (u64)(&v0), (u64)(&v1), 0, 0);
+       if (ret != BIOS_STATUS_SUCCESS)
+               return ret;
+
+       part.val = v0;
+       if (uvtype)
+               *uvtype = part.hub_version;
+       if (partid)
+               *partid = part.partition_id;
+       if (coher)
+               *coher = part.coherence_id;
+       if (region)
+               *region = part.region_size;
+       return ret;
+}
+
+
+s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second)
+{
+       return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type,
+                          (u64)ticks_per_second, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(uv_bios_freq_base);
+
+
+#ifdef CONFIG_EFI
+void uv_bios_init(void)
+{
+       struct uv_systab *tab;
+
+       if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+           (efi.uv_systab == (unsigned long)NULL)) {
+               printk(KERN_CRIT "No EFI UV System Table.\n");
+               uv_systab.function = (unsigned long)NULL;
+               return;
+       }
+
+       tab = (struct uv_systab *)ioremap(efi.uv_systab,
+                                       sizeof(struct uv_systab));
+       if (strncmp(tab->signature, "UVST", 4) != 0)
+               printk(KERN_ERR "bad signature in UV system table!");
+
+       /*
+        * Copy table to permanent spot for later use.
+        */
+       memcpy(&uv_systab, tab, sizeof(struct uv_systab));
+       iounmap(tab);
+
+       printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
+}
+#else  /* !CONFIG_EFI */
+
+void uv_bios_init(void) { }
+#endif
+
diff --git a/arch/x86/kernel/cpu/.gitignore b/arch/x86/kernel/cpu/.gitignore
new file mode 100644 (file)
index 0000000..667df55
--- /dev/null
@@ -0,0 +1 @@
+capflags.c
index 32e7352..8f1e31d 100644 (file)
@@ -249,7 +249,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
        }
        numa_set_node(cpu, node);
 
-       printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+       printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
 #endif
 }
 
index 06fcce5..b046185 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  (C) 2001-2004  Dave Jones. <davej@codemonkey.org.uk>
+ *  (C) 2001-2004  Dave Jones. <davej@redhat.com>
  *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -1019,7 +1019,7 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
 module_param(revid_errata, int, 0644);
 MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID");
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
 MODULE_LICENSE ("GPL");
 
index b5ced80..c1ac579 100644 (file)
@@ -246,7 +246,7 @@ static void __exit powernow_k6_exit(void)
 }
 
 
-MODULE_AUTHOR("Arjan van de Ven <arjanv@redhat.com>, Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
 MODULE_LICENSE("GPL");
 
index 0a61159..7c7d56b 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *  AMD K7 Powernow driver.
- *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs.
+ *  (C) 2003 Dave Jones on behalf of SuSE Labs.
  *  (C) 2003-2004 Dave Jones <davej@redhat.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -692,7 +692,7 @@ static void __exit powernow_exit (void)
 module_param(acpi_force,  int, 0444);
 MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>");
 MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors.");
 MODULE_LICENSE ("GPL");
 
index 84bb395..008d23b 100644 (file)
@@ -7,7 +7,7 @@
  *  Support : mark.langsdorf@amd.com
  *
  *  Based on the powernow-k7.c module written by Dave Jones.
- *  (C) 2003 Dave Jones <davej@codemonkey.org.uk> on behalf of SuSE Labs
+ *  (C) 2003 Dave Jones on behalf of SuSE Labs
  *  (C) 2004 Dominik Brodowski <linux@brodo.de>
  *  (C) 2004 Pavel Machek <pavel@suse.cz>
  *  Licensed under the terms of the GNU GPL License version 2.
index 191f726..04d0376 100644 (file)
@@ -431,7 +431,7 @@ static void __exit speedstep_exit(void)
 }
 
 
-MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>, Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR ("Dave Jones <davej@redhat.com>, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges.");
 MODULE_LICENSE ("GPL");
 
index 99468db..cce0b61 100644 (file)
@@ -174,7 +174,7 @@ static void __cpuinit srat_detect_node(void)
                node = first_node(node_online_map);
        numa_set_node(cpu, node);
 
-       printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+       printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node);
 #endif
 }
 
index f390c9f..dd3af6e 100644 (file)
@@ -1,6 +1,6 @@
 /*
- * Athlon/Hammer specific Machine Check Exception Reporting
- * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk>
+ * Athlon specific Machine Check Exception Reporting
+ * (C) Copyright 2002 Dave Jones <davej@redhat.com>
  */
 
 #include <linux/init.h>
index 774d87c..0ebf3fc 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * mce.c - x86 Machine Check Exception Reporting
- * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
+ * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@redhat.com>
  */
 
 #include <linux/init.h>
index cc1fccd..a74af12 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Non Fatal Machine Check Exception Reporting
  *
- * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
+ * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
  *
  * This file contains routines to check for non-fatal MCEs every 15s
  *
index 6bff382..9abd48b 100644 (file)
@@ -17,6 +17,8 @@
 #include <linux/bitops.h>
 #include <linux/smp.h>
 #include <linux/nmi.h>
+#include <linux/kprobes.h>
+
 #include <asm/apic.h>
 #include <asm/intel_arch_perfmon.h>
 
@@ -336,7 +338,8 @@ static void single_msr_unreserve(void)
        release_perfctr_nmi(wd_ops->perfctr);
 }
 
-static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes
+single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
        /* start the cycle over again */
        write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
@@ -401,7 +404,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
        return 1;
 }
 
-static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
        /*
         * P6 based Pentium M need to re-unmask
@@ -605,7 +608,7 @@ static void p4_unreserve(void)
        release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
 }
 
-static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
+static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
 {
        unsigned dummy;
        /*
@@ -784,7 +787,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz)
        return hz;
 }
 
-int lapic_wd_event(unsigned nmi_hz)
+int __kprobes lapic_wd_event(unsigned nmi_hz)
 {
        struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
        u64 ctr;
index 945a31c..1119d24 100644 (file)
@@ -367,6 +367,10 @@ void __init efi_init(void)
                        efi.smbios = config_tables[i].table;
                        printk(" SMBIOS=0x%lx ", config_tables[i].table);
                } else if (!efi_guidcmp(config_tables[i].guid,
+                                       UV_SYSTEM_TABLE_GUID)) {
+                       efi.uv_systab = config_tables[i].table;
+                       printk(" UVsystab=0x%lx ", config_tables[i].table);
+               } else if (!efi_guidcmp(config_tables[i].guid,
                                        HCDP_TABLE_GUID)) {
                        efi.hcdp = config_tables[i].table;
                        printk(" HCDP=0x%lx ", config_tables[i].table);
index b21fbfa..c356423 100644 (file)
@@ -629,7 +629,7 @@ ENTRY(interrupt)
 ENTRY(irq_entries_start)
        RING0_INT_FRAME
 vector=0
-.rept NR_IRQS
+.rept NR_VECTORS
        ALIGN
  .if vector
        CFI_ADJUST_CFA_OFFSET -4
@@ -1153,20 +1153,6 @@ ENDPROC(xen_failsafe_callback)
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 ENTRY(mcount)
-       pushl %eax
-       pushl %ecx
-       pushl %edx
-       movl 0xc(%esp), %eax
-       subl $MCOUNT_INSN_SIZE, %eax
-
-.globl mcount_call
-mcount_call:
-       call ftrace_stub
-
-       popl %edx
-       popl %ecx
-       popl %eax
-
        ret
 END(mcount)
 
index 1db6ce4..09e7145 100644 (file)
 #ifdef CONFIG_FTRACE
 #ifdef CONFIG_DYNAMIC_FTRACE
 ENTRY(mcount)
-
-       subq $0x38, %rsp
-       movq %rax, (%rsp)
-       movq %rcx, 8(%rsp)
-       movq %rdx, 16(%rsp)
-       movq %rsi, 24(%rsp)
-       movq %rdi, 32(%rsp)
-       movq %r8, 40(%rsp)
-       movq %r9, 48(%rsp)
-
-       movq 0x38(%rsp), %rdi
-       subq $MCOUNT_INSN_SIZE, %rdi
-
-.globl mcount_call
-mcount_call:
-       call ftrace_stub
-
-       movq 48(%rsp), %r9
-       movq 40(%rsp), %r8
-       movq 32(%rsp), %rdi
-       movq 24(%rsp), %rsi
-       movq 16(%rsp), %rdx
-       movq 8(%rsp), %rcx
-       movq (%rsp), %rax
-       addq $0x38, %rsp
-
        retq
 END(mcount)
 
index ab115cd..d073d98 100644 (file)
 
 #include <linux/spinlock.h>
 #include <linux/hardirq.h>
+#include <linux/uaccess.h>
 #include <linux/ftrace.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
 #include <linux/list.h>
 
-#include <asm/alternative.h>
 #include <asm/ftrace.h>
+#include <asm/nops.h>
 
 
 /* Long is fine, even if it is only 4 bytes ;-) */
-static long *ftrace_nop;
+static unsigned long *ftrace_nop;
 
 union ftrace_code_union {
        char code[MCOUNT_INSN_SIZE];
@@ -60,11 +61,7 @@ notrace int
 ftrace_modify_code(unsigned long ip, unsigned char *old_code,
                   unsigned char *new_code)
 {
-       unsigned replaced;
-       unsigned old = *(unsigned *)old_code; /* 4 bytes */
-       unsigned new = *(unsigned *)new_code; /* 4 bytes */
-       unsigned char newch = new_code[4];
-       int faulted = 0;
+       unsigned char replaced[MCOUNT_INSN_SIZE];
 
        /*
         * Note: Due to modules and __init, code can
@@ -72,29 +69,20 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code,
         *  as well as code changing.
         *
         * No real locking needed, this code is run through
-        * kstop_machine.
+        * kstop_machine, or before SMP starts.
         */
-       asm volatile (
-               "1: lock\n"
-               "   cmpxchg %3, (%2)\n"
-               "   jnz 2f\n"
-               "   movb %b4, 4(%2)\n"
-               "2:\n"
-               ".section .fixup, \"ax\"\n"
-               "3:     movl $1, %0\n"
-               "       jmp 2b\n"
-               ".previous\n"
-               _ASM_EXTABLE(1b, 3b)
-               : "=r"(faulted), "=a"(replaced)
-               : "r"(ip), "r"(new), "c"(newch),
-                 "0"(faulted), "a"(old)
-               : "memory");
-       sync_core();
+       if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE))
+               return 1;
+
+       if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+               return 2;
 
-       if (replaced != old && replaced != new)
-               faulted = 2;
+       WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code,
+                                   MCOUNT_INSN_SIZE));
 
-       return faulted;
+       sync_core();
+
+       return 0;
 }
 
 notrace int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -112,30 +100,76 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func)
 
 notrace int ftrace_mcount_set(unsigned long *data)
 {
-       unsigned long ip = (long)(&mcount_call);
-       unsigned long *addr = data;
-       unsigned char old[MCOUNT_INSN_SIZE], *new;
-
-       /*
-        * Replace the mcount stub with a pointer to the
-        * ip recorder function.
-        */
-       memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
-       new = ftrace_call_replace(ip, *addr);
-       *addr = ftrace_modify_code(ip, old, new);
-
+       /* mcount is initialized as a nop */
+       *data = 0;
        return 0;
 }
 
 int __init ftrace_dyn_arch_init(void *data)
 {
-       const unsigned char *const *noptable = find_nop_table();
-
-       /* This is running in kstop_machine */
-
-       ftrace_mcount_set(data);
+       extern const unsigned char ftrace_test_p6nop[];
+       extern const unsigned char ftrace_test_nop5[];
+       extern const unsigned char ftrace_test_jmp[];
+       int faulted = 0;
 
-       ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
+       /*
+        * There is no good nop for all x86 archs.
+        * We will default to using the P6_NOP5, but first we
+        * will test to make sure that the nop will actually
+        * work on this CPU. If it faults, we will then
+        * go to a lesser efficient 5 byte nop. If that fails
+        * we then just use a jmp as our nop. This isn't the most
+        * efficient nop, but we can not use a multi part nop
+        * since we would then risk being preempted in the middle
+        * of that nop, and if we enabled tracing then, it might
+        * cause a system crash.
+        *
+        * TODO: check the cpuid to determine the best nop.
+        */
+       asm volatile (
+               "jmp ftrace_test_jmp\n"
+               /* This code needs to stay around */
+               ".section .text, \"ax\"\n"
+               "ftrace_test_jmp:"
+               "jmp ftrace_test_p6nop\n"
+               "nop\n"
+               "nop\n"
+               "nop\n"  /* 2 byte jmp + 3 bytes */
+               "ftrace_test_p6nop:"
+               P6_NOP5
+               "jmp 1f\n"
+               "ftrace_test_nop5:"
+               ".byte 0x66,0x66,0x66,0x66,0x90\n"
+               "jmp 1f\n"
+               ".previous\n"
+               "1:"
+               ".section .fixup, \"ax\"\n"
+               "2:     movl $1, %0\n"
+               "       jmp ftrace_test_nop5\n"
+               "3:     movl $2, %0\n"
+               "       jmp 1b\n"
+               ".previous\n"
+               _ASM_EXTABLE(ftrace_test_p6nop, 2b)
+               _ASM_EXTABLE(ftrace_test_nop5, 3b)
+               : "=r"(faulted) : "0" (faulted));
+
+       switch (faulted) {
+       case 0:
+               pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n");
+               ftrace_nop = (unsigned long *)ftrace_test_p6nop;
+               break;
+       case 1:
+               pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n");
+               ftrace_nop = (unsigned long *)ftrace_test_nop5;
+               break;
+       case 2:
+               pr_info("ftrace: converting mcount calls to jmp . + 5\n");
+               ftrace_nop = (unsigned long *)ftrace_test_jmp;
+               break;
+       }
+
+       /* The return code is retured via data */
+       *(unsigned long *)data = 0;
 
        return 0;
 }
index 9eca5ba..2ec2de8 100644 (file)
@@ -179,8 +179,10 @@ static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
         * is an example).
         */
        if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
-               (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
+               (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+               printk(KERN_DEBUG "system APIC only can use physical flat");
                return 1;
+       }
 #endif
 
        return 0;
index 33581d9..bfd5328 100644 (file)
@@ -341,12 +341,12 @@ static __init void map_mmioh_high(int max_pnode)
 
 static __init void uv_rtc_init(void)
 {
-       long status, ticks_per_sec, drift;
+       long status;
+       u64 ticks_per_sec;
 
-       status =
-           x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
-                                       &drift);
-       if (status != 0 || ticks_per_sec < 100000) {
+       status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
+                                       &ticks_per_sec);
+       if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
                printk(KERN_WARNING
                        "unable to determine platform RTC clock frequency, "
                        "guessing.\n");
@@ -356,7 +356,22 @@ static __init void uv_rtc_init(void)
                sn_rtc_cycles_per_second = ticks_per_sec;
 }
 
-static bool uv_system_inited;
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ *     ZZZ hotplug not supported yet
+ */
+void __cpuinit uv_cpu_init(void)
+{
+       /* CPU 0 initilization will be done via uv_system_init. */
+       if (!uv_blade_info)
+               return;
+
+       uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+       if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+               set_x2apic_extra_bits(uv_hub_info->pnode);
+}
+
 
 void __init uv_system_init(void)
 {
@@ -412,6 +427,9 @@ void __init uv_system_init(void)
        gnode_upper = (((unsigned long)node_id.s.node_id) &
                       ~((1 << n_val) - 1)) << m_val;
 
+       uv_bios_init();
+       uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
+                           &uv_coherency_id, &uv_region_size);
        uv_rtc_init();
 
        for_each_present_cpu(cpu) {
@@ -433,7 +451,7 @@ void __init uv_system_init(void)
                uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
                uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
                uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
-               uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+               uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id;
                uv_node_to_blade[nid] = blade;
                uv_cpu_to_blade[cpu] = blade;
                max_pnode = max(pnode, max_pnode);
@@ -448,21 +466,6 @@ void __init uv_system_init(void)
        map_mmr_high(max_pnode);
        map_config_high(max_pnode);
        map_mmioh_high(max_pnode);
-       uv_system_inited = true;
-}
 
-/*
- * Called on each cpu to initialize the per_cpu UV data area.
- *     ZZZ hotplug not supported yet
- */
-void __cpuinit uv_cpu_init(void)
-{
-       BUG_ON(!uv_system_inited);
-
-       uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
-
-       if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
-               set_x2apic_extra_bits(uv_hub_info->pnode);
+       uv_cpu_init();
 }
-
-
index acf62fc..77017e8 100644 (file)
@@ -1,29 +1,49 @@
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/sysdev.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/hpet.h>
 #include <linux/init.h>
-#include <linux/sysdev.h>
+#include <linux/cpu.h>
 #include <linux/pm.h>
+#include <linux/io.h>
 
 #include <asm/fixmap.h>
-#include <asm/hpet.h>
 #include <asm/i8253.h>
-#include <asm/io.h>
+#include <asm/hpet.h>
 
-#define HPET_MASK      CLOCKSOURCE_MASK(32)
-#define HPET_SHIFT     22
+#define HPET_MASK                      CLOCKSOURCE_MASK(32)
+#define HPET_SHIFT                     22
 
 /* FSEC = 10^-15
    NSEC = 10^-9 */
-#define FSEC_PER_NSEC  1000000L
+#define FSEC_PER_NSEC                  1000000L
+
+#define HPET_DEV_USED_BIT              2
+#define HPET_DEV_USED                  (1 << HPET_DEV_USED_BIT)
+#define HPET_DEV_VALID                 0x8
+#define HPET_DEV_FSB_CAP               0x1000
+#define HPET_DEV_PERI_CAP              0x2000
+
+#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt)
 
 /*
  * HPET address is set in acpi/boot.c, when an ACPI entry exists
  */
-unsigned long hpet_address;
-static void __iomem *hpet_virt_address;
+unsigned long                          hpet_address;
+unsigned long                          hpet_num_timers;
+static void __iomem                    *hpet_virt_address;
+
+struct hpet_dev {
+       struct clock_event_device       evt;
+       unsigned int                    num;
+       int                             cpu;
+       unsigned int                    irq;
+       unsigned int                    flags;
+       char                            name[10];
+};
 
 unsigned long hpet_readl(unsigned long a)
 {
@@ -59,7 +79,7 @@ static inline void hpet_clear_mapping(void)
 static int boot_hpet_disable;
 int hpet_force_user;
 
-static int __init hpet_setup(char* str)
+static int __init hpet_setup(char *str)
 {
        if (str) {
                if (!strncmp("disable", str, 7))
@@ -80,7 +100,7 @@ __setup("nohpet", disable_hpet);
 
 static inline int is_hpet_capable(void)
 {
-       return (!boot_hpet_disable && hpet_address);
+       return !boot_hpet_disable && hpet_address;
 }
 
 /*
@@ -102,6 +122,9 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled);
  * timer 0 and timer 1 in case of RTC emulation.
  */
 #ifdef CONFIG_HPET
+
+static void hpet_reserve_msi_timers(struct hpet_data *hd);
+
 static void hpet_reserve_platform_timers(unsigned long id)
 {
        struct hpet __iomem *hpet = hpet_virt_address;
@@ -111,10 +134,10 @@ static void hpet_reserve_platform_timers(unsigned long id)
 
        nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
 
-       memset(&hd, 0, sizeof (hd));
-       hd.hd_phys_address = hpet_address;
-       hd.hd_address = hpet;
-       hd.hd_nirqs = nrtimers;
+       memset(&hd, 0, sizeof(hd));
+       hd.hd_phys_address      = hpet_address;
+       hd.hd_address           = hpet;
+       hd.hd_nirqs             = nrtimers;
        hpet_reserve_timer(&hd, 0);
 
 #ifdef CONFIG_HPET_EMULATE_RTC
@@ -130,10 +153,12 @@ static void hpet_reserve_platform_timers(unsigned long id)
        hd.hd_irq[1] = HPET_LEGACY_RTC;
 
        for (i = 2; i < nrtimers; timer++, i++) {
-               hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >>
-                       Tn_INT_ROUTE_CNF_SHIFT;
+               hd.hd_irq[i] = (readl(&timer->hpet_config) &
+                       Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT;
        }
 
+       hpet_reserve_msi_timers(&hd);
+
        hpet_alloc(&hd);
 
 }
@@ -227,60 +252,70 @@ static void hpet_legacy_clockevent_register(void)
        printk(KERN_DEBUG "hpet clockevent registered\n");
 }
 
-static void hpet_legacy_set_mode(enum clock_event_mode mode,
-                         struct clock_event_device *evt)
+static int hpet_setup_msi_irq(unsigned int irq);
+
+static void hpet_set_mode(enum clock_event_mode mode,
+                         struct clock_event_device *evt, int timer)
 {
        unsigned long cfg, cmp, now;
        uint64_t delta;
 
-       switch(mode) {
+       switch (mode) {
        case CLOCK_EVT_MODE_PERIODIC:
-               delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult;
-               delta >>= hpet_clockevent.shift;
+               delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult;
+               delta >>= evt->shift;
                now = hpet_readl(HPET_COUNTER);
                cmp = now + (unsigned long) delta;
-               cfg = hpet_readl(HPET_T0_CFG);
+               cfg = hpet_readl(HPET_Tn_CFG(timer));
                cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
                       HPET_TN_SETVAL | HPET_TN_32BIT;
-               hpet_writel(cfg, HPET_T0_CFG);
+               hpet_writel(cfg, HPET_Tn_CFG(timer));
                /*
                 * The first write after writing TN_SETVAL to the
                 * config register sets the counter value, the second
                 * write sets the period.
                 */
-               hpet_writel(cmp, HPET_T0_CMP);
+               hpet_writel(cmp, HPET_Tn_CMP(timer));
                udelay(1);
-               hpet_writel((unsigned long) delta, HPET_T0_CMP);
+               hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
                break;
 
        case CLOCK_EVT_MODE_ONESHOT:
-               cfg = hpet_readl(HPET_T0_CFG);
+               cfg = hpet_readl(HPET_Tn_CFG(timer));
                cfg &= ~HPET_TN_PERIODIC;
                cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
-               hpet_writel(cfg, HPET_T0_CFG);
+               hpet_writel(cfg, HPET_Tn_CFG(timer));
                break;
 
        case CLOCK_EVT_MODE_UNUSED:
        case CLOCK_EVT_MODE_SHUTDOWN:
-               cfg = hpet_readl(HPET_T0_CFG);
+               cfg = hpet_readl(HPET_Tn_CFG(timer));
                cfg &= ~HPET_TN_ENABLE;
-               hpet_writel(cfg, HPET_T0_CFG);
+               hpet_writel(cfg, HPET_Tn_CFG(timer));
                break;
 
        case CLOCK_EVT_MODE_RESUME:
-               hpet_enable_legacy_int();
+               if (timer == 0) {
+                       hpet_enable_legacy_int();
+               } else {
+                       struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+                       hpet_setup_msi_irq(hdev->irq);
+                       disable_irq(hdev->irq);
+                       irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu));
+                       enable_irq(hdev->irq);
+               }
                break;
        }
 }
 
-static int hpet_legacy_next_event(unsigned long delta,
-                                 struct clock_event_device *evt)
+static int hpet_next_event(unsigned long delta,
+                          struct clock_event_device *evt, int timer)
 {
        u32 cnt;
 
        cnt = hpet_readl(HPET_COUNTER);
        cnt += (u32) delta;
-       hpet_writel(cnt, HPET_T0_CMP);
+       hpet_writel(cnt, HPET_Tn_CMP(timer));
 
        /*
         * We need to read back the CMP register to make sure that
@@ -292,6 +327,347 @@ static int hpet_legacy_next_event(unsigned long delta,
        return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
 }
 
+static void hpet_legacy_set_mode(enum clock_event_mode mode,
+                       struct clock_event_device *evt)
+{
+       hpet_set_mode(mode, evt, 0);
+}
+
+static int hpet_legacy_next_event(unsigned long delta,
+                       struct clock_event_device *evt)
+{
+       return hpet_next_event(delta, evt, 0);
+}
+
+/*
+ * HPET MSI Support
+ */
+#ifdef CONFIG_PCI_MSI
+
+static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev);
+static struct hpet_dev *hpet_devs;
+
+void hpet_msi_unmask(unsigned int irq)
+{
+       struct hpet_dev *hdev = get_irq_data(irq);
+       unsigned long cfg;
+
+       /* unmask it */
+       cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+       cfg |= HPET_TN_FSB;
+       hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+}
+
+void hpet_msi_mask(unsigned int irq)
+{
+       unsigned long cfg;
+       struct hpet_dev *hdev = get_irq_data(irq);
+
+       /* mask it */
+       cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
+       cfg &= ~HPET_TN_FSB;
+       hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
+}
+
+void hpet_msi_write(unsigned int irq, struct msi_msg *msg)
+{
+       struct hpet_dev *hdev = get_irq_data(irq);
+
+       hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num));
+       hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4);
+}
+
+void hpet_msi_read(unsigned int irq, struct msi_msg *msg)
+{
+       struct hpet_dev *hdev = get_irq_data(irq);
+
+       msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num));
+       msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4);
+       msg->address_hi = 0;
+}
+
+static void hpet_msi_set_mode(enum clock_event_mode mode,
+                               struct clock_event_device *evt)
+{
+       struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+       hpet_set_mode(mode, evt, hdev->num);
+}
+
+static int hpet_msi_next_event(unsigned long delta,
+                               struct clock_event_device *evt)
+{
+       struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt);
+       return hpet_next_event(delta, evt, hdev->num);
+}
+
+static int hpet_setup_msi_irq(unsigned int irq)
+{
+       if (arch_setup_hpet_msi(irq)) {
+               destroy_irq(irq);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+static int hpet_assign_irq(struct hpet_dev *dev)
+{
+       unsigned int irq;
+
+       irq = create_irq();
+       if (!irq)
+               return -EINVAL;
+
+       set_irq_data(irq, dev);
+
+       if (hpet_setup_msi_irq(irq))
+               return -EINVAL;
+
+       dev->irq = irq;
+       return 0;
+}
+
+static irqreturn_t hpet_interrupt_handler(int irq, void *data)
+{
+       struct hpet_dev *dev = (struct hpet_dev *)data;
+       struct clock_event_device *hevt = &dev->evt;
+
+       if (!hevt->event_handler) {
+               printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n",
+                               dev->num);
+               return IRQ_HANDLED;
+       }
+
+       hevt->event_handler(hevt);
+       return IRQ_HANDLED;
+}
+
+static int hpet_setup_irq(struct hpet_dev *dev)
+{
+
+       if (request_irq(dev->irq, hpet_interrupt_handler,
+                       IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev))
+               return -1;
+
+       disable_irq(dev->irq);
+       irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu));
+       enable_irq(dev->irq);
+
+       printk(KERN_DEBUG "hpet: %s irq %d for MSI\n",
+                        dev->name, dev->irq);
+
+       return 0;
+}
+
+/* This should be called in specific @cpu */
+static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu)
+{
+       struct clock_event_device *evt = &hdev->evt;
+       uint64_t hpet_freq;
+
+       WARN_ON(cpu != smp_processor_id());
+       if (!(hdev->flags & HPET_DEV_VALID))
+               return;
+
+       if (hpet_setup_msi_irq(hdev->irq))
+               return;
+
+       hdev->cpu = cpu;
+       per_cpu(cpu_hpet_dev, cpu) = hdev;
+       evt->name = hdev->name;
+       hpet_setup_irq(hdev);
+       evt->irq = hdev->irq;
+
+       evt->rating = 110;
+       evt->features = CLOCK_EVT_FEAT_ONESHOT;
+       if (hdev->flags & HPET_DEV_PERI_CAP)
+               evt->features |= CLOCK_EVT_FEAT_PERIODIC;
+
+       evt->set_mode = hpet_msi_set_mode;
+       evt->set_next_event = hpet_msi_next_event;
+       evt->shift = 32;
+
+       /*
+        * The period is a femto seconds value. We need to calculate the
+        * scaled math multiplication factor for nanosecond to hpet tick
+        * conversion.
+        */
+       hpet_freq = 1000000000000000ULL;
+       do_div(hpet_freq, hpet_period);
+       evt->mult = div_sc((unsigned long) hpet_freq,
+                                     NSEC_PER_SEC, evt->shift);
+       /* Calculate the max delta */
+       evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt);
+       /* 5 usec minimum reprogramming delta. */
+       evt->min_delta_ns = 5000;
+
+       evt->cpumask = cpumask_of_cpu(hdev->cpu);
+       clockevents_register_device(evt);
+}
+
+#ifdef CONFIG_HPET
+/* Reserve at least one timer for userspace (/dev/hpet) */
+#define RESERVE_TIMERS 1
+#else
+#define RESERVE_TIMERS 0
+#endif
+
+static void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+       unsigned int id;
+       unsigned int num_timers;
+       unsigned int num_timers_used = 0;
+       int i;
+
+       id = hpet_readl(HPET_ID);
+
+       num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+       num_timers++; /* Value read out starts from 0 */
+
+       hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
+       if (!hpet_devs)
+               return;
+
+       hpet_num_timers = num_timers;
+
+       for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) {
+               struct hpet_dev *hdev = &hpet_devs[num_timers_used];
+               unsigned long cfg = hpet_readl(HPET_Tn_CFG(i));
+
+               /* Only consider HPET timer with MSI support */
+               if (!(cfg & HPET_TN_FSB_CAP))
+                       continue;
+
+               hdev->flags = 0;
+               if (cfg & HPET_TN_PERIODIC_CAP)
+                       hdev->flags |= HPET_DEV_PERI_CAP;
+               hdev->num = i;
+
+               sprintf(hdev->name, "hpet%d", i);
+               if (hpet_assign_irq(hdev))
+                       continue;
+
+               hdev->flags |= HPET_DEV_FSB_CAP;
+               hdev->flags |= HPET_DEV_VALID;
+               num_timers_used++;
+               if (num_timers_used == num_possible_cpus())
+                       break;
+       }
+
+       printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n",
+               num_timers, num_timers_used);
+}
+
+#ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
+{
+       int i;
+
+       if (!hpet_devs)
+               return;
+
+       for (i = 0; i < hpet_num_timers; i++) {
+               struct hpet_dev *hdev = &hpet_devs[i];
+
+               if (!(hdev->flags & HPET_DEV_VALID))
+                       continue;
+
+               hd->hd_irq[hdev->num] = hdev->irq;
+               hpet_reserve_timer(hd, hdev->num);
+       }
+}
+#endif
+
+static struct hpet_dev *hpet_get_unused_timer(void)
+{
+       int i;
+
+       if (!hpet_devs)
+               return NULL;
+
+       for (i = 0; i < hpet_num_timers; i++) {
+               struct hpet_dev *hdev = &hpet_devs[i];
+
+               if (!(hdev->flags & HPET_DEV_VALID))
+                       continue;
+               if (test_and_set_bit(HPET_DEV_USED_BIT,
+                       (unsigned long *)&hdev->flags))
+                       continue;
+               return hdev;
+       }
+       return NULL;
+}
+
+struct hpet_work_struct {
+       struct delayed_work work;
+       struct completion complete;
+};
+
+static void hpet_work(struct work_struct *w)
+{
+       struct hpet_dev *hdev;
+       int cpu = smp_processor_id();
+       struct hpet_work_struct *hpet_work;
+
+       hpet_work = container_of(w, struct hpet_work_struct, work.work);
+
+       hdev = hpet_get_unused_timer();
+       if (hdev)
+               init_one_hpet_msi_clockevent(hdev, cpu);
+
+       complete(&hpet_work->complete);
+}
+
+static int hpet_cpuhp_notify(struct notifier_block *n,
+               unsigned long action, void *hcpu)
+{
+       unsigned long cpu = (unsigned long)hcpu;
+       struct hpet_work_struct work;
+       struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu);
+
+       switch (action & 0xf) {
+       case CPU_ONLINE:
+               INIT_DELAYED_WORK(&work.work, hpet_work);
+               init_completion(&work.complete);
+               /* FIXME: add schedule_work_on() */
+               schedule_delayed_work_on(cpu, &work.work, 0);
+               wait_for_completion(&work.complete);
+               break;
+       case CPU_DEAD:
+               if (hdev) {
+                       free_irq(hdev->irq, hdev);
+                       hdev->flags &= ~HPET_DEV_USED;
+                       per_cpu(cpu_hpet_dev, cpu) = NULL;
+               }
+               break;
+       }
+       return NOTIFY_OK;
+}
+#else
+
+static int hpet_setup_msi_irq(unsigned int irq)
+{
+       return 0;
+}
+static void hpet_msi_capability_lookup(unsigned int start_timer)
+{
+       return;
+}
+
+#ifdef CONFIG_HPET
+static void hpet_reserve_msi_timers(struct hpet_data *hd)
+{
+       return;
+}
+#endif
+
+static int hpet_cpuhp_notify(struct notifier_block *n,
+               unsigned long action, void *hcpu)
+{
+       return NOTIFY_OK;
+}
+
+#endif
+
 /*
  * Clock source related code
  */
@@ -427,8 +803,10 @@ int __init hpet_enable(void)
 
        if (id & HPET_ID_LEGSUP) {
                hpet_legacy_clockevent_register();
+               hpet_msi_capability_lookup(2);
                return 1;
        }
+       hpet_msi_capability_lookup(0);
        return 0;
 
 out_nohpet:
@@ -445,6 +823,8 @@ out_nohpet:
  */
 static __init int hpet_late_init(void)
 {
+       int cpu;
+
        if (boot_hpet_disable)
                return -ENODEV;
 
@@ -460,6 +840,13 @@ static __init int hpet_late_init(void)
 
        hpet_reserve_platform_timers(hpet_readl(HPET_ID));
 
+       for_each_online_cpu(cpu) {
+               hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
+       }
+
+       /* This notifier should be called after workqueue is ready */
+       hotcpu_notifier(hpet_cpuhp_notify, -20);
+
        return 0;
 }
 fs_initcall(hpet_late_init);
similarity index 68%
rename from arch/x86/kernel/io_apic_64.c
rename to arch/x86/kernel/io_apic.c
index 02063ae..b764d74 100644 (file)
 #include <linux/sched.h>
 #include <linux/pci.h>
 #include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
 #include <linux/acpi.h>
+#include <linux/module.h>
 #include <linux/sysdev.h>
 #include <linux/msi.h>
 #include <linux/htirq.h>
-#include <linux/dmar.h>
-#include <linux/jiffies.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/jiffies.h>     /* time_after() */
 #ifdef CONFIG_ACPI
 #include <acpi/acpi_bus.h>
 #endif
 #include <linux/bootmem.h>
 #include <linux/dmar.h>
+#include <linux/hpet.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
 #include <asm/proto.h>
 #include <asm/acpi.h>
 #include <asm/dma.h>
+#include <asm/timer.h>
 #include <asm/i8259.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
+#include <asm/setup.h>
 #include <asm/irq_remapping.h>
+#include <asm/hpet.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/uv/uv_irq.h>
 
 #include <mach_ipi.h>
 #include <mach_apic.h>
+#include <mach_apicdef.h>
 
 #define __apicdebuginit(type) static type __init
 
-struct irq_cfg {
-       cpumask_t domain;
-       cpumask_t old_domain;
-       unsigned move_cleanup_count;
-       u8 vector;
-       u8 move_in_progress : 1;
-};
-
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
-static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = {
-       [0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-       [1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-       [2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-       [3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-       [4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-       [5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-       [6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-       [7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-       [8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-       [9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-       [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-       [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-       [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-       [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-       [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-       [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
-};
-
-static int assign_irq_vector(int irq, cpumask_t mask);
-
-int first_system_vector = 0xfe;
-
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
-
-int sis_apic_bug; /* not actually supported, dummy for compile */
-
-static int no_timer_check;
-
-static int disable_timer_pin_1 __initdata;
-
-int timer_through_8259 __initdata;
-
-/* Where if anywhere is the i8259 connect in external int mode */
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+/*
+ *      Is the SiS APIC rmw bug present ?
+ *      -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
 
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(vector_lock);
@@ -110,9 +81,6 @@ static DEFINE_SPINLOCK(vector_lock);
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 
-/* I/O APIC RTE contents at the OS boot up */
-struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
-
 /* I/O APIC entries */
 struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
@@ -123,11 +91,69 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+int mp_bus_id_to_type[MAX_MP_BUSSES];
+#endif
+
 DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
+int skip_ioapic_setup;
+
+static int __init parse_noapic(char *str)
+{
+       /* disable IO-APIC */
+       disable_ioapic_setup();
+       return 0;
+}
+early_param("noapic", parse_noapic);
+
+struct irq_pin_list;
+struct irq_cfg {
+       unsigned int irq;
+       struct irq_pin_list *irq_2_pin;
+       cpumask_t domain;
+       cpumask_t old_domain;
+       unsigned move_cleanup_count;
+       u8 vector;
+       u8 move_in_progress : 1;
+};
+
+/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+static struct irq_cfg irq_cfgx[NR_IRQS] = {
+       [0]  = { .irq =  0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
+       [1]  = { .irq =  1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
+       [2]  = { .irq =  2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
+       [3]  = { .irq =  3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
+       [4]  = { .irq =  4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
+       [5]  = { .irq =  5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
+       [6]  = { .irq =  6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
+       [7]  = { .irq =  7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
+       [8]  = { .irq =  8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
+       [9]  = { .irq =  9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
+       [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
+       [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
+       [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
+       [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
+       [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
+       [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+};
+
+#define for_each_irq_cfg(irq, cfg)             \
+       for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
+
+static struct irq_cfg *irq_cfg(unsigned int irq)
+{
+       return irq < nr_irqs ? irq_cfgx + irq : NULL;
+}
+
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
+{
+       return irq_cfg(irq);
+}
+
 /*
- * Rough estimation of how many shared IRQs there are, can
- * be changed anytime.
+ * Rough estimation of how many shared IRQs there are, can be changed
+ * anytime.
  */
 #define MAX_PLUS_SHARED_IRQS NR_IRQS
 #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
@@ -139,9 +165,36 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
  * between pins and IRQs.
  */
 
-static struct irq_pin_list {
-       short apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+struct irq_pin_list {
+       int apic, pin;
+       struct irq_pin_list *next;
+};
+
+static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
+static struct irq_pin_list *irq_2_pin_ptr;
+
+static void __init irq_2_pin_init(void)
+{
+       struct irq_pin_list *pin = irq_2_pin_head;
+       int i;
+
+       for (i = 1; i < PIN_MAP_SIZE; i++)
+               pin[i-1].next = &pin[i];
+
+       irq_2_pin_ptr = &pin[0];
+}
+
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
+{
+       struct irq_pin_list *pin = irq_2_pin_ptr;
+
+       if (!pin)
+               panic("can not get more irq_2_pin\n");
+
+       irq_2_pin_ptr = pin->next;
+       pin->next = NULL;
+       return pin;
+}
 
 struct io_apic {
        unsigned int index;
@@ -172,10 +225,15 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i
 /*
  * Re-write a value: to be used for read-modify-write
  * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
  */
-static inline void io_apic_modify(unsigned int apic, unsigned int value)
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
 {
        struct io_apic __iomem *io_apic = io_apic_base(apic);
+
+       if (sis_apic_bug)
+               writel(reg, &io_apic->index);
        writel(value, &io_apic->data);
 }
 
@@ -183,16 +241,17 @@ static bool io_apic_level_ack_pending(unsigned int irq)
 {
        struct irq_pin_list *entry;
        unsigned long flags;
+       struct irq_cfg *cfg = irq_cfg(irq);
 
        spin_lock_irqsave(&ioapic_lock, flags);
-       entry = irq_2_pin + irq;
+       entry = cfg->irq_2_pin;
        for (;;) {
                unsigned int reg;
                int pin;
 
-               pin = entry->pin;
-               if (pin == -1)
+               if (!entry)
                        break;
+               pin = entry->pin;
                reg = io_apic_read(entry->apic, 0x10 + pin*2);
                /* Is the remote IRR bit set? */
                if (reg & IO_APIC_REDIR_REMOTE_IRR) {
@@ -201,45 +260,13 @@ static bool io_apic_level_ack_pending(unsigned int irq)
                }
                if (!entry->next)
                        break;
-               entry = irq_2_pin + entry->next;
+               entry = entry->next;
        }
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        return false;
 }
 
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
-       struct io_apic __iomem *io_apic = io_apic_base(apic);
-       readl(&io_apic->data);
-}
-
-#define __DO_ACTION(R, ACTION, FINAL)                                  \
-                                                                       \
-{                                                                      \
-       int pin;                                                        \
-       struct irq_pin_list *entry = irq_2_pin + irq;                   \
-                                                                       \
-       BUG_ON(irq >= NR_IRQS);                                         \
-       for (;;) {                                                      \
-               unsigned int reg;                                       \
-               pin = entry->pin;                                       \
-               if (pin == -1)                                          \
-                       break;                                          \
-               reg = io_apic_read(entry->apic, 0x10 + R + pin*2);      \
-               reg ACTION;                                             \
-               io_apic_modify(entry->apic, reg);                       \
-               FINAL;                                                  \
-               if (!entry->next)                                       \
-                       break;                                          \
-               entry = irq_2_pin + entry->next;                        \
-       }                                                               \
-}
-
 union entry_union {
        struct { u32 w1, w2; };
        struct IO_APIC_route_entry entry;
@@ -299,59 +326,71 @@ static void ioapic_mask_entry(int apic, int pin)
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
        int apic, pin;
-       struct irq_pin_list *entry = irq_2_pin + irq;
+       struct irq_cfg *cfg;
+       struct irq_pin_list *entry;
 
-       BUG_ON(irq >= NR_IRQS);
+       cfg = irq_cfg(irq);
+       entry = cfg->irq_2_pin;
        for (;;) {
                unsigned int reg;
+
+               if (!entry)
+                       break;
+
                apic = entry->apic;
                pin = entry->pin;
-               if (pin == -1)
-                       break;
+#ifdef CONFIG_INTR_REMAP
                /*
                 * With interrupt-remapping, destination information comes
                 * from interrupt-remapping table entry.
                 */
                if (!irq_remapped(irq))
                        io_apic_write(apic, 0x11 + pin*2, dest);
+#else
+               io_apic_write(apic, 0x11 + pin*2, dest);
+#endif
                reg = io_apic_read(apic, 0x10 + pin*2);
                reg &= ~IO_APIC_REDIR_VECTOR_MASK;
                reg |= vector;
-               io_apic_modify(apic, reg);
+               io_apic_modify(apic, 0x10 + pin*2, reg);
                if (!entry->next)
                        break;
-               entry = irq_2_pin + entry->next;
+               entry = entry->next;
        }
 }
 
+static int assign_irq_vector(int irq, cpumask_t mask);
+
 static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        unsigned long flags;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
                return;
 
+       cfg = irq_cfg(irq);
        if (assign_irq_vector(irq, mask))
                return;
 
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
-
        /*
         * Only the high 8 bits are valid.
         */
        dest = SET_APIC_LOGICAL_ID(dest);
 
+       desc = irq_to_desc(irq);
        spin_lock_irqsave(&ioapic_lock, flags);
        __target_IO_APIC_irq(irq, dest, cfg->vector);
-       irq_desc[irq].affinity = mask;
+       desc->affinity = mask;
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
-#endif
+#endif /* CONFIG_SMP */
 
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
@@ -360,19 +399,30 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
  */
 static void add_pin_to_irq(unsigned int irq, int apic, int pin)
 {
-       static int first_free_entry = NR_IRQS;
-       struct irq_pin_list *entry = irq_2_pin + irq;
+       struct irq_cfg *cfg;
+       struct irq_pin_list *entry;
+
+       /* first time to refer irq_cfg, so with new */
+       cfg = irq_cfg_alloc(irq);
+       entry = cfg->irq_2_pin;
+       if (!entry) {
+               entry = get_one_free_irq_2_pin();
+               cfg->irq_2_pin = entry;
+               entry->apic = apic;
+               entry->pin = pin;
+               return;
+       }
 
-       BUG_ON(irq >= NR_IRQS);
-       while (entry->next)
-               entry = irq_2_pin + entry->next;
+       while (entry->next) {
+               /* not again, please */
+               if (entry->apic == apic && entry->pin == pin)
+                       return;
 
-       if (entry->pin != -1) {
-               entry->next = first_free_entry;
-               entry = irq_2_pin + entry->next;
-               if (++first_free_entry >= PIN_MAP_SIZE)
-                       panic("io_apic.c: ran out of irq_2_pin entries!");
+               entry = entry->next;
        }
+
+       entry->next = get_one_free_irq_2_pin();
+       entry = entry->next;
        entry->apic = apic;
        entry->pin = pin;
 }
@@ -384,30 +434,86 @@ static void __init replace_pin_at_irq(unsigned int irq,
                                      int oldapic, int oldpin,
                                      int newapic, int newpin)
 {
-       struct irq_pin_list *entry = irq_2_pin + irq;
+       struct irq_cfg *cfg = irq_cfg(irq);
+       struct irq_pin_list *entry = cfg->irq_2_pin;
+       int replaced = 0;
 
-       while (1) {
+       while (entry) {
                if (entry->apic == oldapic && entry->pin == oldpin) {
                        entry->apic = newapic;
                        entry->pin = newpin;
-               }
-               if (!entry->next)
+                       replaced = 1;
+                       /* every one is different, right? */
                        break;
-               entry = irq_2_pin + entry->next;
+               }
+               entry = entry->next;
+       }
+
+       /* why? call replace before add? */
+       if (!replaced)
+               add_pin_to_irq(irq, newapic, newpin);
+}
+
+static inline void io_apic_modify_irq(unsigned int irq,
+                               int mask_and, int mask_or,
+                               void (*final)(struct irq_pin_list *entry))
+{
+       int pin;
+       struct irq_cfg *cfg;
+       struct irq_pin_list *entry;
+
+       cfg = irq_cfg(irq);
+       for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+               unsigned int reg;
+               pin = entry->pin;
+               reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+               reg &= mask_and;
+               reg |= mask_or;
+               io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+               if (final)
+                       final(entry);
        }
 }
 
+static void __unmask_IO_APIC_irq(unsigned int irq)
+{
+       io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
+}
 
-#define DO_ACTION(name,R,ACTION, FINAL)                                        \
-                                                                       \
-       static void name##_IO_APIC_irq (unsigned int irq)               \
-       __DO_ACTION(R, ACTION, FINAL)
+#ifdef CONFIG_X86_64
+void io_apic_sync(struct irq_pin_list *entry)
+{
+       /*
+        * Synchronize the IO-APIC and the CPU by doing
+        * a dummy read from the IO-APIC
+        */
+       struct io_apic __iomem *io_apic;
+       io_apic = io_apic_base(entry->apic);
+       readl(&io_apic->data);
+}
 
-/* mask = 1 */
-DO_ACTION(__mask,      0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+       io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+}
+#else /* CONFIG_X86_32 */
+static void __mask_IO_APIC_irq(unsigned int irq)
+{
+       io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
+}
 
-/* mask = 0 */
-DO_ACTION(__unmask,    0, &= ~IO_APIC_REDIR_MASKED, )
+static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
+{
+       io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+                       IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
+{
+       io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
+                       IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
+}
+#endif /* CONFIG_X86_32 */
 
 static void mask_IO_APIC_irq (unsigned int irq)
 {
@@ -450,6 +556,68 @@ static void clear_IO_APIC (void)
                        clear_IO_APIC_pin(apic, pin);
 }
 
+#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
+void send_IPI_self(int vector)
+{
+       unsigned int cfg;
+
+       /*
+        * Wait for idle.
+        */
+       apic_wait_icr_idle();
+       cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+       /*
+        * Send the IPI. The write to APIC_ICR fires this off.
+        */
+       apic_write(APIC_ICR, cfg);
+}
+#endif /* !CONFIG_SMP && CONFIG_X86_32*/
+
+#ifdef CONFIG_X86_32
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries [MAX_PIRQS];
+static int pirqs_enabled;
+
+static int __init ioapic_pirq_setup(char *str)
+{
+       int i, max;
+       int ints[MAX_PIRQS+1];
+
+       get_options(str, ARRAY_SIZE(ints), ints);
+
+       for (i = 0; i < MAX_PIRQS; i++)
+               pirq_entries[i] = -1;
+
+       pirqs_enabled = 1;
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "PIRQ redirection, working around broken MP-BIOS.\n");
+       max = MAX_PIRQS;
+       if (ints[0] < MAX_PIRQS)
+               max = ints[0];
+
+       for (i = 0; i < max; i++) {
+               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                               "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+               /*
+                * PIRQs are mapped upside down, usually.
+                */
+               pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+       }
+       return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_INTR_REMAP
+/* I/O APIC RTE contents at the OS boot up */
+static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
 /*
  * Saves and masks all the unmasked IO-APIC RTE's
  */
@@ -474,7 +642,7 @@ int save_mask_IO_APIC_setup(void)
                        kzalloc(sizeof(struct IO_APIC_route_entry) *
                                nr_ioapic_registers[apic], GFP_KERNEL);
                if (!early_ioapic_entries[apic])
-                       return -ENOMEM;
+                       goto nomem;
        }
 
        for (apic = 0; apic < nr_ioapics; apic++)
@@ -488,17 +656,31 @@ int save_mask_IO_APIC_setup(void)
                                ioapic_write_entry(apic, pin, entry);
                        }
                }
+
        return 0;
+
+nomem:
+       while (apic >= 0)
+               kfree(early_ioapic_entries[apic--]);
+       memset(early_ioapic_entries, 0,
+               ARRAY_SIZE(early_ioapic_entries));
+
+       return -ENOMEM;
 }
 
 void restore_IO_APIC_setup(void)
 {
        int apic, pin;
 
-       for (apic = 0; apic < nr_ioapics; apic++)
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               if (!early_ioapic_entries[apic])
+                       break;
                for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
                        ioapic_write_entry(apic, pin,
                                           early_ioapic_entries[apic][pin]);
+               kfree(early_ioapic_entries[apic]);
+               early_ioapic_entries[apic] = NULL;
+       }
 }
 
 void reinit_intr_remapped_IO_APIC(int intr_remapping)
@@ -512,25 +694,7 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping)
         */
        restore_IO_APIC_setup();
 }
-
-int skip_ioapic_setup;
-int ioapic_force;
-
-static int __init parse_noapic(char *str)
-{
-       disable_ioapic_setup();
-       return 0;
-}
-early_param("noapic", parse_noapic);
-
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
-static int __init disable_timer_pin_setup(char *arg)
-{
-       disable_timer_pin_1 = 1;
-       return 1;
-}
-__setup("disable_timer_pin_1", disable_timer_pin_setup);
-
+#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -634,22 +798,54 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
                                best_guess = irq;
                }
        }
-       BUG_ON(best_guess >= NR_IRQS);
        return best_guess;
 }
 
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+       if (irq < 16) {
+               unsigned int port = 0x4d0 + (irq >> 3);
+               return (inb(port) >> (irq & 7)) & 1;
+       }
+       apic_printk(APIC_VERBOSE, KERN_INFO
+                       "Broken MPtable reports ISA irq %d\n", irq);
+       return 0;
+}
+
+#endif
+
 /* ISA interrupts are always polarity zero edge triggered,
  * when listed as conforming in the MP table. */
 
 #define default_ISA_trigger(idx)       (0)
 #define default_ISA_polarity(idx)      (0)
 
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)      (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+#define default_EISA_polarity(idx)     default_ISA_polarity(idx)
+
 /* PCI interrupts are always polarity one level triggered,
  * when listed as conforming in the MP table. */
 
 #define default_PCI_trigger(idx)       (1)
 #define default_PCI_polarity(idx)      (1)
 
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)       (1)
+#define default_MCA_polarity(idx)      default_ISA_polarity(idx)
+
 static int MPBIOS_polarity(int idx)
 {
        int bus = mp_irqs[idx].mp_srcbus;
@@ -707,6 +903,36 @@ static int MPBIOS_trigger(int idx)
                                trigger = default_ISA_trigger(idx);
                        else
                                trigger = default_PCI_trigger(idx);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+                       switch (mp_bus_id_to_type[bus]) {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+                                       /* set before the switch */
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+                               {
+                                       trigger = default_EISA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+                                       /* set before the switch */
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+                               {
+                                       trigger = default_MCA_trigger(idx);
+                                       break;
+                               }
+                               default:
+                               {
+                                       printk(KERN_WARNING "broken BIOS!!\n");
+                                       trigger = 1;
+                                       break;
+                               }
+                       }
+#endif
                        break;
                case 1: /* edge */
                {
@@ -744,6 +970,7 @@ static inline int irq_trigger(int idx)
        return MPBIOS_trigger(idx);
 }
 
+int (*ioapic_renumber_irq)(int ioapic, int irq);
 static int pin_2_irq(int idx, int apic, int pin)
 {
        int irq, i;
@@ -765,8 +992,32 @@ static int pin_2_irq(int idx, int apic, int pin)
                while (i < apic)
                        irq += nr_ioapic_registers[i++];
                irq += pin;
+               /*
+                 * For MPS mode, so far only needed by ES7000 platform
+                 */
+               if (ioapic_renumber_irq)
+                       irq = ioapic_renumber_irq(apic, irq);
        }
-       BUG_ON(irq >= NR_IRQS);
+
+#ifdef CONFIG_X86_32
+       /*
+        * PCI IRQ command line redirection. Yes, limits are hardcoded.
+        */
+       if ((pin >= 16) && (pin <= 23)) {
+               if (pirq_entries[pin-16] != -1) {
+                       if (!pirq_entries[pin-16]) {
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "disabling PIRQ%d\n", pin-16);
+                       } else {
+                               irq = pirq_entries[pin-16];
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG
+                                               "using PIRQ%d -> IRQ %d\n",
+                                               pin-16, irq);
+                       }
+               }
+       }
+#endif
+
        return irq;
 }
 
@@ -801,8 +1052,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask)
        int cpu;
        struct irq_cfg *cfg;
 
-       BUG_ON((unsigned)irq >= NR_IRQS);
-       cfg = &irq_cfg[irq];
+       cfg = irq_cfg(irq);
 
        /* Only try and allocate irqs on cpus that are present */
        cpus_and(mask, mask, cpu_online_map);
@@ -837,8 +1087,13 @@ next:
                }
                if (unlikely(current_vector == vector))
                        continue;
+#ifdef CONFIG_X86_64
                if (vector == IA32_SYSCALL_VECTOR)
                        goto next;
+#else
+               if (vector == SYSCALL_VECTOR)
+                       goto next;
+#endif
                for_each_cpu_mask_nr(new_cpu, new_mask)
                        if (per_cpu(vector_irq, new_cpu)[vector] != -1)
                                goto next;
@@ -875,8 +1130,7 @@ static void __clear_irq_vector(int irq)
        cpumask_t mask;
        int cpu, vector;
 
-       BUG_ON((unsigned)irq >= NR_IRQS);
-       cfg = &irq_cfg[irq];
+       cfg = irq_cfg(irq);
        BUG_ON(!cfg->vector);
 
        vector = cfg->vector;
@@ -893,12 +1147,13 @@ void __setup_vector_irq(int cpu)
        /* Initialize vector_irq on a new cpu */
        /* This function must be called with vector_lock held */
        int irq, vector;
+       struct irq_cfg *cfg;
 
        /* Mark the inuse vectors */
-       for (irq = 0; irq < NR_IRQS; ++irq) {
-               if (!cpu_isset(cpu, irq_cfg[irq].domain))
+       for_each_irq_cfg(irq, cfg) {
+               if (!cpu_isset(cpu, cfg->domain))
                        continue;
-               vector = irq_cfg[irq].vector;
+               vector = cfg->vector;
                per_cpu(vector_irq, cpu)[vector] = irq;
        }
        /* Mark the free vectors */
@@ -906,7 +1161,9 @@ void __setup_vector_irq(int cpu)
                irq = per_cpu(vector_irq, cpu)[vector];
                if (irq < 0)
                        continue;
-               if (!cpu_isset(cpu, irq_cfg[irq].domain))
+
+               cfg = irq_cfg(irq);
+               if (!cpu_isset(cpu, cfg->domain))
                        per_cpu(vector_irq, cpu)[vector] = -1;
        }
 }
@@ -916,16 +1173,49 @@ static struct irq_chip ioapic_chip;
 static struct irq_chip ir_ioapic_chip;
 #endif
 
+#define IOAPIC_AUTO     -1
+#define IOAPIC_EDGE     0
+#define IOAPIC_LEVEL    1
+
+#ifdef CONFIG_X86_32
+static inline int IO_APIC_irq_trigger(int irq)
+{
+       int apic, idx, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       idx = find_irq_entry(apic, pin, mp_INT);
+                       if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+                               return irq_trigger(idx);
+               }
+       }
+       /*
+         * nonexistent IRQs are edge default
+         */
+       return 0;
+}
+#else
+static inline int IO_APIC_irq_trigger(int irq)
+{
+       return 1;
+}
+#endif
+
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
-       if (trigger)
-               irq_desc[irq].status |= IRQ_LEVEL;
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+           trigger == IOAPIC_LEVEL)
+               desc->status |= IRQ_LEVEL;
        else
-               irq_desc[irq].status &= ~IRQ_LEVEL;
+               desc->status &= ~IRQ_LEVEL;
 
 #ifdef CONFIG_INTR_REMAP
        if (irq_remapped(irq)) {
-               irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+               desc->status |= IRQ_MOVE_PCNTXT;
                if (trigger)
                        set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
                                                      handle_fasteoi_irq,
@@ -936,7 +1226,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger)
                return;
        }
 #endif
-       if (trigger)
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+           trigger == IOAPIC_LEVEL)
                set_irq_chip_and_handler_name(irq, &ioapic_chip,
                                              handle_fasteoi_irq,
                                              "fasteoi");
@@ -1009,13 +1300,15 @@ static int setup_ioapic_entry(int apic, int irq,
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
                              int trigger, int polarity)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        struct IO_APIC_route_entry entry;
        cpumask_t mask;
 
        if (!IO_APIC_IRQ(irq))
                return;
 
+       cfg = irq_cfg(irq);
+
        mask = TARGET_CPUS;
        if (assign_irq_vector(irq, mask))
                return;
@@ -1047,37 +1340,49 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 
 static void __init setup_IO_APIC_irqs(void)
 {
-       int apic, pin, idx, irq, first_notcon = 1;
+       int apic, pin, idx, irq;
+       int notcon = 0;
 
        apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
        for (apic = 0; apic < nr_ioapics; apic++) {
-       for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
-
-               idx = find_irq_entry(apic,pin,mp_INT);
-               if (idx == -1) {
-                       if (first_notcon) {
-                               apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
-                               first_notcon = 0;
-                       } else
-                               apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
-                       continue;
-               }
-               if (!first_notcon) {
-                       apic_printk(APIC_VERBOSE, " not connected.\n");
-                       first_notcon = 1;
-               }
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 
-               irq = pin_2_irq(idx, apic, pin);
-               add_pin_to_irq(irq, apic, pin);
+                       idx = find_irq_entry(apic, pin, mp_INT);
+                       if (idx == -1) {
+                               if (!notcon) {
+                                       notcon = 1;
+                                       apic_printk(APIC_VERBOSE,
+                                               KERN_DEBUG " %d-%d",
+                                               mp_ioapics[apic].mp_apicid,
+                                               pin);
+                               } else
+                                       apic_printk(APIC_VERBOSE, " %d-%d",
+                                               mp_ioapics[apic].mp_apicid,
+                                               pin);
+                               continue;
+                       }
+                       if (notcon) {
+                               apic_printk(APIC_VERBOSE,
+                                       " (apicid-pin) not connected\n");
+                               notcon = 0;
+                       }
 
-               setup_IO_APIC_irq(apic, pin, irq,
-                                 irq_trigger(idx), irq_polarity(idx));
-       }
+                       irq = pin_2_irq(idx, apic, pin);
+#ifdef CONFIG_X86_32
+                       if (multi_timer_check(apic, irq))
+                               continue;
+#endif
+                       add_pin_to_irq(irq, apic, pin);
+
+                       setup_IO_APIC_irq(apic, pin, irq,
+                                       irq_trigger(idx), irq_polarity(idx));
+               }
        }
 
-       if (!first_notcon)
-               apic_printk(APIC_VERBOSE, " not connected.\n");
+       if (notcon)
+               apic_printk(APIC_VERBOSE,
+                       " (apicid-pin) not connected\n");
 }
 
 /*
@@ -1088,8 +1393,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 {
        struct IO_APIC_route_entry entry;
 
+#ifdef CONFIG_INTR_REMAP
        if (intr_remapping_enabled)
                return;
+#endif
 
        memset(&entry, 0, sizeof(entry));
 
@@ -1124,7 +1431,10 @@ __apicdebuginit(void) print_IO_APIC(void)
        union IO_APIC_reg_00 reg_00;
        union IO_APIC_reg_01 reg_01;
        union IO_APIC_reg_02 reg_02;
+       union IO_APIC_reg_03 reg_03;
        unsigned long flags;
+       struct irq_cfg *cfg;
+       unsigned int irq;
 
        if (apic_verbosity == APIC_QUIET)
                return;
@@ -1147,12 +1457,16 @@ __apicdebuginit(void) print_IO_APIC(void)
        reg_01.raw = io_apic_read(apic, 1);
        if (reg_01.bits.version >= 0x10)
                reg_02.raw = io_apic_read(apic, 2);
+       if (reg_01.bits.version >= 0x20)
+               reg_03.raw = io_apic_read(apic, 3);
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        printk("\n");
        printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
        printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
        printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+       printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+       printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
 
        printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
        printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
@@ -1160,11 +1474,27 @@ __apicdebuginit(void) print_IO_APIC(void)
        printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
        printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
 
-       if (reg_01.bits.version >= 0x10) {
+       /*
+        * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+        * but the value of reg_02 is read as the previous read register
+        * value, so ignore it if reg_02 == reg_01.
+        */
+       if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
                printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
                printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
        }
 
+       /*
+        * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+        * or reg_03, but the value of reg_0[23] is read as the previous read
+        * register value, so ignore it if reg_03 == reg_0[12].
+        */
+       if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+           reg_03.raw != reg_01.raw) {
+               printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+               printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
+       }
+
        printk(KERN_DEBUG ".... IRQ redirection table:\n");
 
        printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
@@ -1193,16 +1523,16 @@ __apicdebuginit(void) print_IO_APIC(void)
        }
        }
        printk(KERN_DEBUG "IRQ to pin mappings:\n");
-       for (i = 0; i < NR_IRQS; i++) {
-               struct irq_pin_list *entry = irq_2_pin + i;
-               if (entry->pin < 0)
+       for_each_irq_cfg(irq, cfg) {
+               struct irq_pin_list *entry = cfg->irq_2_pin;
+               if (!entry)
                        continue;
-               printk(KERN_DEBUG "IRQ%d ", i);
+               printk(KERN_DEBUG "IRQ%d ", irq);
                for (;;) {
                        printk("-> %d:%d", entry->apic, entry->pin);
                        if (!entry->next)
                                break;
-                       entry = irq_2_pin + entry->next;
+                       entry = entry->next;
                }
                printk("\n");
        }
@@ -1236,7 +1566,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
 __apicdebuginit(void) print_local_APIC(void *dummy)
 {
        unsigned int v, ver, maxlvt;
-       unsigned long icr;
+       u64 icr;
 
        if (apic_verbosity == APIC_QUIET)
                return;
@@ -1253,20 +1583,31 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
        v = apic_read(APIC_TASKPRI);
        printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
 
-       v = apic_read(APIC_ARBPRI);
-       printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-               v & APIC_ARBPRI_MASK);
-       v = apic_read(APIC_PROCPRI);
-       printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+       if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+               if (!APIC_XAPIC(ver)) {
+                       v = apic_read(APIC_ARBPRI);
+                       printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+                              v & APIC_ARBPRI_MASK);
+               }
+               v = apic_read(APIC_PROCPRI);
+               printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+       }
+
+       /*
+        * Remote read supported only in the 82489DX and local APIC for
+        * Pentium processors.
+        */
+       if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+               v = apic_read(APIC_RRR);
+               printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+       }
 
-       v = apic_read(APIC_EOI);
-       printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
-       v = apic_read(APIC_RRR);
-       printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
        v = apic_read(APIC_LDR);
        printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-       v = apic_read(APIC_DFR);
-       printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+       if (!x2apic_enabled()) {
+               v = apic_read(APIC_DFR);
+               printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+       }
        v = apic_read(APIC_SPIV);
        printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 
@@ -1277,8 +1618,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
        printk(KERN_DEBUG "... APIC IRR field:\n");
        print_APIC_bitfield(APIC_IRR);
 
-       v = apic_read(APIC_ESR);
-       printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+       if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+                       apic_write(APIC_ESR, 0);
+
+               v = apic_read(APIC_ESR);
+               printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+       }
 
        icr = apic_icr_read();
        printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
@@ -1312,7 +1658,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
 
 __apicdebuginit(void) print_all_local_APICs(void)
 {
-       on_each_cpu(print_local_APIC, NULL, 1);
+       int cpu;
+
+       preempt_disable();
+       for_each_online_cpu(cpu)
+               smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+       preempt_enable();
 }
 
 __apicdebuginit(void) print_PIC(void)
@@ -1359,17 +1710,22 @@ __apicdebuginit(int) print_all_ICs(void)
 fs_initcall(print_all_ICs);
 
 
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
 void __init enable_IO_APIC(void)
 {
        union IO_APIC_reg_01 reg_01;
        int i8259_apic, i8259_pin;
-       int i, apic;
+       int apic;
        unsigned long flags;
 
-       for (i = 0; i < PIN_MAP_SIZE; i++) {
-               irq_2_pin[i].pin = -1;
-               irq_2_pin[i].next = 0;
-       }
+#ifdef CONFIG_X86_32
+       int i;
+       if (!pirqs_enabled)
+               for (i = 0; i < MAX_PIRQS; i++)
+                       pirq_entries[i] = -1;
+#endif
 
        /*
         * The number of IO-APIC IRQ registers (== #pins):
@@ -1399,6 +1755,10 @@ void __init enable_IO_APIC(void)
        }
  found_i8259:
        /* Look to see what if the MP table has reported the ExtINT */
+       /* If we could not find the appropriate pin by looking at the ioapic
+        * the i8259 probably is not connected the ioapic but give the
+        * mptable a chance anyway.
+        */
        i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
        i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
        /* Trust the MP table if nothing is setup in the hardware */
@@ -1458,6 +1818,133 @@ void disable_IO_APIC(void)
        disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 
+#ifdef CONFIG_X86_32
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc(void)
+{
+       union IO_APIC_reg_00 reg_00;
+       physid_mask_t phys_id_present_map;
+       int apic;
+       int i;
+       unsigned char old_id;
+       unsigned long flags;
+
+       if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+               return;
+
+       /*
+        * Don't check I/O APIC IDs for xAPIC systems.  They have
+        * no meaning without the serial APIC bus.
+        */
+       if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+               || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               return;
+       /*
+        * This is broken; anything with a real cpu count has to
+        * circumvent this idiocy regardless.
+        */
+       phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+
+       /*
+        * Set the IOAPIC ID to the value stored in the MPC table.
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+
+               /* Read the register 0 value */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_00.raw = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               old_id = mp_ioapics[apic].mp_apicid;
+
+               if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+                               apic, mp_ioapics[apic].mp_apicid);
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+                               reg_00.bits.ID);
+                       mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+               }
+
+               /*
+                * Sanity check, is the ID really free? Every APIC in a
+                * system must have a unique ID or we get lots of nice
+                * 'stuck on smp_invalidate_needed IPI wait' messages.
+                */
+               if (check_apicid_used(phys_id_present_map,
+                                       mp_ioapics[apic].mp_apicid)) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+                               apic, mp_ioapics[apic].mp_apicid);
+                       for (i = 0; i < get_physical_broadcast(); i++)
+                               if (!physid_isset(i, phys_id_present_map))
+                                       break;
+                       if (i >= get_physical_broadcast())
+                               panic("Max APIC ID exceeded!\n");
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+                               i);
+                       physid_set(i, phys_id_present_map);
+                       mp_ioapics[apic].mp_apicid = i;
+               } else {
+                       physid_mask_t tmp;
+                       tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+                       apic_printk(APIC_VERBOSE, "Setting %d in the "
+                                       "phys_id_present_map\n",
+                                       mp_ioapics[apic].mp_apicid);
+                       physids_or(phys_id_present_map, phys_id_present_map, tmp);
+               }
+
+
+               /*
+                * We need to adjust the IRQ routing table
+                * if the ID changed.
+                */
+               if (old_id != mp_ioapics[apic].mp_apicid)
+                       for (i = 0; i < mp_irq_entries; i++)
+                               if (mp_irqs[i].mp_dstapic == old_id)
+                                       mp_irqs[i].mp_dstapic
+                                               = mp_ioapics[apic].mp_apicid;
+
+               /*
+                * Read the right value from the MPC table and
+                * write it into the ID register.
+                */
+               apic_printk(APIC_VERBOSE, KERN_INFO
+                       "...changing IO-APIC physical APIC ID to %d ...",
+                       mp_ioapics[apic].mp_apicid);
+
+               reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic, 0, reg_00.raw);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               /*
+                * Sanity check
+                */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               reg_00.raw = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+                       printk("could not set ID!\n");
+               else
+                       apic_printk(APIC_VERBOSE, " ok.\n");
+       }
+}
+#endif
+
+int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+       no_timer_check = 1;
+       return 1;
+}
+__setup("no_timer_check", notimercheck);
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1471,6 +1958,9 @@ static int __init timer_irq_works(void)
        unsigned long t1 = jiffies;
        unsigned long flags;
 
+       if (no_timer_check)
+               return 1;
+
        local_save_flags(flags);
        local_irq_enable();
        /* Let ten ticks pass... */
@@ -1531,9 +2021,11 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
        return was_pending;
 }
 
+#ifdef CONFIG_X86_64
 static int ioapic_retrigger_irq(unsigned int irq)
 {
-       struct irq_cfg *cfg = &irq_cfg[irq];
+
+       struct irq_cfg *cfg = irq_cfg(irq);
        unsigned long flags;
 
        spin_lock_irqsave(&vector_lock, flags);
@@ -1542,6 +2034,14 @@ static int ioapic_retrigger_irq(unsigned int irq)
 
        return 1;
 }
+#else
+static int ioapic_retrigger_irq(unsigned int irq)
+{
+       send_IPI_self(irq_cfg(irq)->vector);
+
+       return 1;
+}
+#endif
 
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
@@ -1580,11 +2080,11 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
  */
 static void migrate_ioapic_irq(int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
-       struct irq_desc *desc = irq_desc + irq;
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
        cpumask_t tmp, cleanup_mask;
        struct irte irte;
-       int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+       int modify_ioapic_rte;
        unsigned int dest;
        unsigned long flags;
 
@@ -1598,9 +2098,12 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
+       desc = irq_to_desc(irq);
+       modify_ioapic_rte = desc->status & IRQ_LEVEL;
        if (modify_ioapic_rte) {
                spin_lock_irqsave(&ioapic_lock, flags);
                __target_IO_APIC_irq(irq, dest, cfg->vector);
@@ -1622,18 +2125,19 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask)
                cfg->move_in_progress = 0;
        }
 
-       irq_desc[irq].affinity = mask;
+       desc->affinity = mask;
 }
 
 static int migrate_irq_remapped_level(int irq)
 {
        int ret = -1;
+       struct irq_desc *desc = irq_to_desc(irq);
 
        mask_IO_APIC_irq(irq);
 
        if (io_apic_level_ack_pending(irq)) {
                /*
-                * Interrupt in progress. Migrating irq now will change the
+                * Interrupt in progress. Migrating irq now will change the
                 * vector information in the IO-APIC RTE and that will confuse
                 * the EOI broadcast performed by cpu.
                 * So, delay the irq migration to the next instance.
@@ -1643,11 +2147,11 @@ static int migrate_irq_remapped_level(int irq)
        }
 
        /* everthing is clear. we have right of way */
-       migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+       migrate_ioapic_irq(irq, desc->pending_mask);
 
        ret = 0;
-       irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
-       cpus_clear(irq_desc[irq].pending_mask);
+       desc->status &= ~IRQ_MOVE_PENDING;
+       cpus_clear(desc->pending_mask);
 
 unmask:
        unmask_IO_APIC_irq(irq);
@@ -1656,10 +2160,10 @@ unmask:
 
 static void ir_irq_migration(struct work_struct *work)
 {
-       int irq;
+       unsigned int irq;
+       struct irq_desc *desc;
 
-       for (irq = 0; irq < NR_IRQS; irq++) {
-               struct irq_desc *desc = irq_desc + irq;
+       for_each_irq_desc(irq, desc) {
                if (desc->status & IRQ_MOVE_PENDING) {
                        unsigned long flags;
 
@@ -1671,8 +2175,7 @@ static void ir_irq_migration(struct work_struct *work)
                                continue;
                        }
 
-                       desc->chip->set_affinity(irq,
-                                                irq_desc[irq].pending_mask);
+                       desc->chip->set_affinity(irq, desc->pending_mask);
                        spin_unlock_irqrestore(&desc->lock, flags);
                }
        }
@@ -1683,9 +2186,11 @@ static void ir_irq_migration(struct work_struct *work)
  */
 static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-       if (irq_desc[irq].status & IRQ_LEVEL) {
-               irq_desc[irq].status |= IRQ_MOVE_PENDING;
-               irq_desc[irq].pending_mask = mask;
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       if (desc->status & IRQ_LEVEL) {
+               desc->status |= IRQ_MOVE_PENDING;
+               desc->pending_mask = mask;
                migrate_irq_remapped_level(irq);
                return;
        }
@@ -1698,7 +2203,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
        unsigned vector, me;
        ack_APIC_irq();
+#ifdef CONFIG_X86_64
        exit_idle();
+#endif
        irq_enter();
 
        me = smp_processor_id();
@@ -1707,11 +2214,12 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
                struct irq_desc *desc;
                struct irq_cfg *cfg;
                irq = __get_cpu_var(vector_irq)[vector];
-               if (irq >= NR_IRQS)
+
+               desc = irq_to_desc(irq);
+               if (!desc)
                        continue;
 
-               desc = irq_desc + irq;
-               cfg = irq_cfg + irq;
+               cfg = irq_cfg(irq);
                spin_lock(&desc->lock);
                if (!cfg->move_cleanup_count)
                        goto unlock;
@@ -1730,7 +2238,7 @@ unlock:
 
 static void irq_complete_move(unsigned int irq)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg = irq_cfg(irq);
        unsigned vector, me;
 
        if (likely(!cfg->move_in_progress))
@@ -1769,19 +2277,50 @@ static void ack_apic_edge(unsigned int irq)
        ack_APIC_irq();
 }
 
+atomic_t irq_mis_count;
+
 static void ack_apic_level(unsigned int irq)
 {
+#ifdef CONFIG_X86_32
+       unsigned long v;
+       int i;
+#endif
        int do_unmask_irq = 0;
 
        irq_complete_move(irq);
 #ifdef CONFIG_GENERIC_PENDING_IRQ
        /* If we are moving the irq we need to mask it */
-       if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
+       if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
                do_unmask_irq = 1;
                mask_IO_APIC_irq(irq);
        }
 #endif
 
+#ifdef CONFIG_X86_32
+       /*
+       * It appears there is an erratum which affects at least version 0x11
+       * of I/O APIC (that's the 82093AA and cores integrated into various
+       * chipsets).  Under certain conditions a level-triggered interrupt is
+       * erroneously delivered as edge-triggered one but the respective IRR
+       * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+       * message but it will never arrive and further interrupts are blocked
+       * from the source.  The exact reason is so far unknown, but the
+       * phenomenon was observed when two consecutive interrupt requests
+       * from a given source get delivered to the same CPU and the source is
+       * temporarily disabled in between.
+       *
+       * A workaround is to simulate an EOI message manually.  We achieve it
+       * by setting the trigger mode to edge and then to level when the edge
+       * trigger mode gets detected in the TMR of a local APIC for a
+       * level-triggered interrupt.  We mask the source for the time of the
+       * operation to prevent an edge-triggered interrupt escaping meanwhile.
+       * The idea is from Manfred Spraul.  --macro
+       */
+       i = irq_cfg(irq)->vector;
+
+       v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+#endif
+
        /*
         * We must acknowledge the irq before we move it or the acknowledge will
         * not propagate properly.
@@ -1820,31 +2359,41 @@ static void ack_apic_level(unsigned int irq)
                        move_masked_irq(irq);
                unmask_IO_APIC_irq(irq);
        }
+
+#ifdef CONFIG_X86_32
+       if (!(v & (1 << (i & 0x1f)))) {
+               atomic_inc(&irq_mis_count);
+               spin_lock(&ioapic_lock);
+               __mask_and_edge_IO_APIC_irq(irq);
+               __unmask_and_level_IO_APIC_irq(irq);
+               spin_unlock(&ioapic_lock);
+       }
+#endif
 }
 
 static struct irq_chip ioapic_chip __read_mostly = {
-       .name           = "IO-APIC",
-       .startup        = startup_ioapic_irq,
-       .mask           = mask_IO_APIC_irq,
-       .unmask         = unmask_IO_APIC_irq,
-       .ack            = ack_apic_edge,
-       .eoi            = ack_apic_level,
+       .name           = "IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_apic_edge,
+       .eoi            = ack_apic_level,
 #ifdef CONFIG_SMP
-       .set_affinity   = set_ioapic_affinity_irq,
+       .set_affinity   = set_ioapic_affinity_irq,
 #endif
        .retrigger      = ioapic_retrigger_irq,
 };
 
 #ifdef CONFIG_INTR_REMAP
 static struct irq_chip ir_ioapic_chip __read_mostly = {
-       .name           = "IR-IO-APIC",
-       .startup        = startup_ioapic_irq,
-       .mask           = mask_IO_APIC_irq,
-       .unmask         = unmask_IO_APIC_irq,
-       .ack            = ack_x2apic_edge,
-       .eoi            = ack_x2apic_level,
+       .name           = "IR-IO-APIC",
+       .startup        = startup_ioapic_irq,
+       .mask           = mask_IO_APIC_irq,
+       .unmask         = unmask_IO_APIC_irq,
+       .ack            = ack_x2apic_edge,
+       .eoi            = ack_x2apic_level,
 #ifdef CONFIG_SMP
-       .set_affinity   = set_ir_ioapic_affinity_irq,
+       .set_affinity   = set_ir_ioapic_affinity_irq,
 #endif
        .retrigger      = ioapic_retrigger_irq,
 };
@@ -1853,6 +2402,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = {
 static inline void init_IO_APIC_traps(void)
 {
        int irq;
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
 
        /*
         * NOTE! The local APIC isn't very good at handling
@@ -1865,8 +2416,8 @@ static inline void init_IO_APIC_traps(void)
         * Also, we've got to be careful not to trash gate
         * 0x80, because int 0x80 is hm, kind of importantish. ;)
         */
-       for (irq = 0; irq < NR_IRQS ; irq++) {
-               if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
+       for_each_irq_cfg(irq, cfg) {
+               if (IO_APIC_IRQ(irq) && !cfg->vector) {
                        /*
                         * Hmm.. We don't have an entry for this,
                         * so default to an old-fashioned 8259
@@ -1874,27 +2425,33 @@ static inline void init_IO_APIC_traps(void)
                         */
                        if (irq < 16)
                                make_8259A_irq(irq);
-                       else
+                       else {
+                               desc = irq_to_desc(irq);
                                /* Strange. Oh, well.. */
-                               irq_desc[irq].chip = &no_irq_chip;
+                               desc->chip = &no_irq_chip;
+                       }
                }
        }
 }
 
-static void unmask_lapic_irq(unsigned int irq)
+/*
+ * The local APIC irq-chip implementation:
+ */
+
+static void mask_lapic_irq(unsigned int irq)
 {
        unsigned long v;
 
        v = apic_read(APIC_LVT0);
-       apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+       apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
 }
 
-static void mask_lapic_irq(unsigned int irq)
+static void unmask_lapic_irq(unsigned int irq)
 {
        unsigned long v;
 
        v = apic_read(APIC_LVT0);
-       apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+       apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
 }
 
 static void ack_lapic_irq (unsigned int irq)
@@ -1911,7 +2468,10 @@ static struct irq_chip lapic_chip __read_mostly = {
 
 static void lapic_register_intr(int irq)
 {
-       irq_desc[irq].status &= ~IRQ_LEVEL;
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+       desc->status &= ~IRQ_LEVEL;
        set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
                                      "edge");
 }
@@ -1919,19 +2479,19 @@ static void lapic_register_intr(int irq)
 static void __init setup_nmi(void)
 {
        /*
-        * Dirty trick to enable the NMI watchdog ...
+        * Dirty trick to enable the NMI watchdog ...
         * We put the 8259A master into AEOI mode and
         * unmask on all local APICs LVT0 as NMI.
         *
         * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
         * is from Maciej W. Rozycki - so we do not have to EOI from
         * the NMI handler or the timer interrupt.
-        */ 
-       printk(KERN_INFO "activating NMI Watchdog ...");
+        */
+       apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
 
        enable_NMI_through_LVT0();
 
-       printk(" done.\n");
+       apic_printk(APIC_VERBOSE, " done.\n");
 }
 
 /*
@@ -1948,12 +2508,17 @@ static inline void __init unlock_ExtINT_logic(void)
        unsigned char save_control, save_freq_select;
 
        pin  = find_isa_irq_pin(8, mp_INT);
+       if (pin == -1) {
+               WARN_ON_ONCE(1);
+               return;
+       }
        apic = find_isa_irq_apic(8, mp_INT);
-       if (pin == -1)
+       if (apic == -1) {
+               WARN_ON_ONCE(1);
                return;
+       }
 
        entry0 = ioapic_read_entry(apic, pin);
-
        clear_IO_APIC_pin(apic, pin);
 
        memset(&entry1, 0, sizeof(entry1));
@@ -1988,23 +2553,38 @@ static inline void __init unlock_ExtINT_logic(void)
        ioapic_write_entry(apic, pin, entry0);
 }
 
+static int disable_timer_pin_1 __initdata;
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
+{
+       disable_timer_pin_1 = 1;
+       return 0;
+}
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
+
+int timer_through_8259 __initdata;
+
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
  * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
  * fanatically on his truly buggy board.
  *
- * FIXME: really need to revamp this for modern platforms only.
+ * FIXME: really need to revamp this for all platforms.
  */
 static inline void __init check_timer(void)
 {
-       struct irq_cfg *cfg = irq_cfg + 0;
+       struct irq_cfg *cfg = irq_cfg(0);
        int apic1, pin1, apic2, pin2;
        unsigned long flags;
+       unsigned int ver;
        int no_pin1 = 0;
 
        local_irq_save(flags);
 
+       ver = apic_read(APIC_LVR);
+       ver = GET_APIC_VERSION(ver);
+
        /*
         * get/set the timer IRQ vector:
         */
@@ -2013,10 +2593,18 @@ static inline void __init check_timer(void)
 
        /*
         * As IRQ0 is to be enabled in the 8259A, the virtual
-        * wire has to be disabled in the local APIC.
+        * wire has to be disabled in the local APIC.  Also
+        * timer interrupts need to be acknowledged manually in
+        * the 8259A for the i82489DX when using the NMI
+        * watchdog as that APIC treats NMIs as level-triggered.
+        * The AEOI mode will finish them in the 8259A
+        * automatically.
         */
        apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
        init_8259A(1);
+#ifdef CONFIG_X86_32
+       timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+#endif
 
        pin1  = find_isa_irq_pin(0, mp_INT);
        apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2035,8 +2623,10 @@ static inline void __init check_timer(void)
         * 8259A.
         */
        if (pin1 == -1) {
+#ifdef CONFIG_INTR_REMAP
                if (intr_remapping_enabled)
                        panic("BIOS bug: timer not connected to IO-APIC");
+#endif
                pin1 = pin2;
                apic1 = apic2;
                no_pin1 = 1;
@@ -2054,7 +2644,7 @@ static inline void __init check_timer(void)
                        setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
                }
                unmask_IO_APIC_irq(0);
-               if (!no_timer_check && timer_irq_works()) {
+               if (timer_irq_works()) {
                        if (nmi_watchdog == NMI_IO_APIC) {
                                setup_nmi();
                                enable_8259A_irq(0);
@@ -2063,8 +2653,10 @@ static inline void __init check_timer(void)
                                clear_IO_APIC_pin(0, pin1);
                        goto out;
                }
+#ifdef CONFIG_INTR_REMAP
                if (intr_remapping_enabled)
                        panic("timer doesn't work through Interrupt-remapped IO-APIC");
+#endif
                clear_IO_APIC_pin(apic1, pin1);
                if (!no_pin1)
                        apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2104,6 +2696,9 @@ static inline void __init check_timer(void)
                            "through the IO-APIC - disabling NMI Watchdog!\n");
                nmi_watchdog = NMI_NONE;
        }
+#ifdef CONFIG_X86_32
+       timer_ack = 0;
+#endif
 
        apic_printk(APIC_QUIET, KERN_INFO
                    "...trying to set up timer as Virtual Wire IRQ...\n");
@@ -2140,13 +2735,6 @@ out:
        local_irq_restore(flags);
 }
 
-static int __init notimercheck(char *s)
-{
-       no_timer_check = 1;
-       return 1;
-}
-__setup("no_timer_check", notimercheck);
-
 /*
  * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
  * to devices.  However there may be an I/O APIC pin available for
@@ -2164,25 +2752,49 @@ __setup("no_timer_check", notimercheck);
  * the I/O APIC in all cases now.  No actual device should request
  * it anyway.  --macro
  */
-#define PIC_IRQS       (1<<2)
+#define PIC_IRQS       (1 << PIC_CASCADE_IR)
 
 void __init setup_IO_APIC(void)
 {
 
+#ifdef CONFIG_X86_32
+       enable_IO_APIC();
+#else
        /*
         * calling enable_IO_APIC() is moved to setup_local_APIC for BP
         */
+#endif
 
        io_apic_irqs = ~PIC_IRQS;
 
        apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
-
+       /*
+         * Set up IO-APIC IRQ routing.
+         */
+#ifdef CONFIG_X86_32
+       if (!acpi_ioapic)
+               setup_ioapic_ids_from_mpc();
+#endif
        sync_Arb_IDs();
        setup_IO_APIC_irqs();
        init_IO_APIC_traps();
        check_timer();
 }
 
+/*
+ *      Called after all the initialization is done. If we didnt find any
+ *      APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+       if (sis_apic_bug == -1)
+               sis_apic_bug = 0;
+       return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
 struct sysfs_ioapic_data {
        struct sys_device dev;
        struct IO_APIC_route_entry entry[0];
@@ -2270,32 +2882,51 @@ device_initcall(ioapic_init_sysfs);
 /*
  * Dynamic irq allocate and deallocation
  */
-int create_irq(void)
+unsigned int create_irq_nr(unsigned int irq_want)
 {
        /* Allocate an unused irq */
-       int irq;
-       int new;
+       unsigned int irq;
+       unsigned int new;
        unsigned long flags;
+       struct irq_cfg *cfg_new;
+
+       irq_want = nr_irqs - 1;
 
-       irq = -ENOSPC;
+       irq = 0;
        spin_lock_irqsave(&vector_lock, flags);
-       for (new = (NR_IRQS - 1); new >= 0; new--) {
+       for (new = irq_want; new > 0; new--) {
                if (platform_legacy_irq(new))
                        continue;
-               if (irq_cfg[new].vector != 0)
+               cfg_new = irq_cfg(new);
+               if (cfg_new && cfg_new->vector != 0)
                        continue;
+               /* check if need to create one */
+               if (!cfg_new)
+                       cfg_new = irq_cfg_alloc(new);
                if (__assign_irq_vector(new, TARGET_CPUS) == 0)
                        irq = new;
                break;
        }
        spin_unlock_irqrestore(&vector_lock, flags);
 
-       if (irq >= 0) {
+       if (irq > 0) {
                dynamic_irq_init(irq);
        }
        return irq;
 }
 
+int create_irq(void)
+{
+       int irq;
+
+       irq = create_irq_nr(nr_irqs - 1);
+
+       if (irq == 0)
+               irq = -1;
+
+       return irq;
+}
+
 void destroy_irq(unsigned int irq)
 {
        unsigned long flags;
@@ -2316,7 +2947,7 @@ void destroy_irq(unsigned int irq)
 #ifdef CONFIG_PCI_MSI
 static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        int err;
        unsigned dest;
        cpumask_t tmp;
@@ -2326,6 +2957,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
        if (err)
                return err;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, tmp);
        dest = cpu_mask_to_apicid(tmp);
 
@@ -2383,10 +3015,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 #ifdef CONFIG_SMP
 static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2395,6 +3028,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
@@ -2406,7 +3040,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        write_msi_msg(irq, &msg);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
 
 #ifdef CONFIG_INTR_REMAP
@@ -2416,10 +3051,11 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
  */
 static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        unsigned int dest;
        cpumask_t tmp, cleanup_mask;
        struct irte irte;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2431,6 +3067,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
@@ -2454,7 +3091,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
                cfg->move_in_progress = 0;
        }
 
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
 #endif
 #endif /* CONFIG_SMP */
@@ -2507,7 +3145,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
        if (index < 0) {
                printk(KERN_ERR
                       "Unable to allocate %d IRTE for PCI %s\n", nvec,
-                       pci_name(dev));
+                      pci_name(dev));
                return -ENOSPC;
        }
        return index;
@@ -2528,7 +3166,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 
 #ifdef CONFIG_INTR_REMAP
        if (irq_remapped(irq)) {
-               struct irq_desc *desc = irq_desc + irq;
+               struct irq_desc *desc = irq_to_desc(irq);
                /*
                 * irq migration in process context
                 */
@@ -2538,16 +3176,34 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 #endif
                set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 
+       dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
        return 0;
 }
 
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
+{
+       unsigned int irq;
+
+       irq = dev->bus->number;
+       irq <<= 8;
+       irq |= dev->devfn;
+       irq <<= 12;
+
+       return irq;
+}
+
 int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
-       int irq, ret;
+       unsigned int irq;
+       int ret;
+       unsigned int irq_want;
 
-       irq = create_irq();
-       if (irq < 0)
-               return irq;
+       irq_want = build_irq_for_pci_dev(dev) + 0x100;
+
+       irq = create_irq_nr(irq_want);
+       if (irq == 0)
+               return -1;
 
 #ifdef CONFIG_INTR_REMAP
        if (!intr_remapping_enabled)
@@ -2574,18 +3230,22 @@ error:
 
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
-       int irq, ret, sub_handle;
+       unsigned int irq;
+       int ret, sub_handle;
        struct msi_desc *desc;
+       unsigned int irq_want;
+
 #ifdef CONFIG_INTR_REMAP
        struct intel_iommu *iommu = 0;
        int index = 0;
 #endif
 
+       irq_want = build_irq_for_pci_dev(dev) + 0x100;
        sub_handle = 0;
        list_for_each_entry(desc, &dev->msi_list, list) {
-               irq = create_irq();
-               if (irq < 0)
-                       return irq;
+               irq = create_irq_nr(irq_want--);
+               if (irq == 0)
+                       return -1;
 #ifdef CONFIG_INTR_REMAP
                if (!intr_remapping_enabled)
                        goto no_ir;
@@ -2636,10 +3296,11 @@ void arch_teardown_msi_irq(unsigned int irq)
 #ifdef CONFIG_SMP
 static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        struct msi_msg msg;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2648,6 +3309,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
@@ -2659,7 +3321,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        dmar_msi_write(irq, &msg);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
 #endif /* CONFIG_SMP */
 
@@ -2689,6 +3352,69 @@ int arch_setup_dmar_msi(unsigned int irq)
 }
 #endif
 
+#ifdef CONFIG_HPET_TIMER
+
+#ifdef CONFIG_SMP
+static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
+{
+       struct irq_cfg *cfg;
+       struct irq_desc *desc;
+       struct msi_msg msg;
+       unsigned int dest;
+       cpumask_t tmp;
+
+       cpus_and(tmp, mask, cpu_online_map);
+       if (cpus_empty(tmp))
+               return;
+
+       if (assign_irq_vector(irq, mask))
+               return;
+
+       cfg = irq_cfg(irq);
+       cpus_and(tmp, cfg->domain, mask);
+       dest = cpu_mask_to_apicid(tmp);
+
+       hpet_msi_read(irq, &msg);
+
+       msg.data &= ~MSI_DATA_VECTOR_MASK;
+       msg.data |= MSI_DATA_VECTOR(cfg->vector);
+       msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+       msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+       hpet_msi_write(irq, &msg);
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
+}
+#endif /* CONFIG_SMP */
+
+struct irq_chip hpet_msi_type = {
+       .name = "HPET_MSI",
+       .unmask = hpet_msi_unmask,
+       .mask = hpet_msi_mask,
+       .ack = ack_apic_edge,
+#ifdef CONFIG_SMP
+       .set_affinity = hpet_msi_set_affinity,
+#endif
+       .retrigger = ioapic_retrigger_irq,
+};
+
+int arch_setup_hpet_msi(unsigned int irq)
+{
+       int ret;
+       struct msi_msg msg;
+
+       ret = msi_compose_msg(NULL, irq, &msg);
+       if (ret < 0)
+               return ret;
+
+       hpet_msi_write(irq, &msg);
+       set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
+               "edge");
+
+       return 0;
+}
+#endif
+
 #endif /* CONFIG_PCI_MSI */
 /*
  * Hypertransport interrupt support
@@ -2713,9 +3439,10 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        unsigned int dest;
        cpumask_t tmp;
+       struct irq_desc *desc;
 
        cpus_and(tmp, mask, cpu_online_map);
        if (cpus_empty(tmp))
@@ -2724,11 +3451,13 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
        if (assign_irq_vector(irq, mask))
                return;
 
+       cfg = irq_cfg(irq);
        cpus_and(tmp, cfg->domain, mask);
        dest = cpu_mask_to_apicid(tmp);
 
        target_ht_irq(irq, dest, cfg->vector);
-       irq_desc[irq].affinity = mask;
+       desc = irq_to_desc(irq);
+       desc->affinity = mask;
 }
 #endif
 
@@ -2745,7 +3474,7 @@ static struct irq_chip ht_irq_chip = {
 
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 {
-       struct irq_cfg *cfg = irq_cfg + irq;
+       struct irq_cfg *cfg;
        int err;
        cpumask_t tmp;
 
@@ -2755,6 +3484,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
                struct ht_irq_msg msg;
                unsigned dest;
 
+               cfg = irq_cfg(irq);
                cpus_and(tmp, cfg->domain, tmp);
                dest = cpu_mask_to_apicid(tmp);
 
@@ -2777,20 +3507,196 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
                set_irq_chip_and_handler_name(irq, &ht_irq_chip,
                                              handle_edge_irq, "edge");
+
+               dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
        }
        return err;
 }
 #endif /* CONFIG_HT_IRQ */
 
+#ifdef CONFIG_X86_64
+/*
+ * Re-target the irq to the specified CPU and enable the specified MMR located
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
+ */
+int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+                      unsigned long mmr_offset)
+{
+       const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
+       struct irq_cfg *cfg;
+       int mmr_pnode;
+       unsigned long mmr_value;
+       struct uv_IO_APIC_route_entry *entry;
+       unsigned long flags;
+       int err;
+
+       err = assign_irq_vector(irq, *eligible_cpu);
+       if (err != 0)
+               return err;
+
+       spin_lock_irqsave(&vector_lock, flags);
+       set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+                                     irq_name);
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       cfg = irq_cfg(irq);
+
+       mmr_value = 0;
+       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+       entry->vector = cfg->vector;
+       entry->delivery_mode = INT_DELIVERY_MODE;
+       entry->dest_mode = INT_DEST_MODE;
+       entry->polarity = 0;
+       entry->trigger = 0;
+       entry->mask = 0;
+       entry->dest = cpu_mask_to_apicid(*eligible_cpu);
+
+       mmr_pnode = uv_blade_to_pnode(mmr_blade);
+       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+       return irq;
+}
+
+/*
+ * Disable the specified MMR located on the specified blade so that MSIs are
+ * longer allowed to be sent.
+ */
+void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+{
+       unsigned long mmr_value;
+       struct uv_IO_APIC_route_entry *entry;
+       int mmr_pnode;
+
+       mmr_value = 0;
+       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
+       entry->mask = 1;
+
+       mmr_pnode = uv_blade_to_pnode(mmr_blade);
+       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+}
+#endif