]> nv-tegra.nvidia Code Review - linux-2.6.git/commitdiff
Merge branch 'x86-64'
authorLinus Torvalds <torvalds@g5.osdl.org>
Mon, 26 Jun 2006 17:51:09 +0000 (10:51 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Mon, 26 Jun 2006 17:51:09 +0000 (10:51 -0700)
* x86-64: (83 commits)
  [PATCH] x86_64: x86_64 stack usage debugging
  [PATCH] x86_64: (resend) x86_64 stack overflow debugging
  [PATCH] x86_64: msi_apic.c build fix
  [PATCH] x86_64: i386/x86-64 Add nmi watchdog support for new Intel CPUs
  [PATCH] x86_64: Avoid broadcasting NMI IPIs
  [PATCH] x86_64: fix apic error on bootup
  [PATCH] x86_64: enlarge window for stack growth
  [PATCH] x86_64: Minor string functions optimizations
  [PATCH] x86_64: Move export symbols to their C functions
  [PATCH] x86_64: Standardize i386/x86_64 handling of NMI_VECTOR
  [PATCH] x86_64: Fix modular pc speaker
  [PATCH] x86_64: remove sys32_ni_syscall()
  [PATCH] x86_64: Do not use -ffunction-sections for modules
  [PATCH] x86_64: Add cpu_relax to apic_wait_icr_idle
  [PATCH] x86_64: adjust kstack_depth_to_print default
  [PATCH] i386/x86-64: adjust /proc/interrupts column headings
  [PATCH] x86_64: Fix race in cpu_local_* on preemptible kernels
  [PATCH] x86_64: Fix fast check in safe_smp_processor_id
  [PATCH] x86_64: x86_64 setup.c - printing cmp related boottime information
  [PATCH] i386/x86-64/ia64: Move polling flag into thread_info_status
  ...

Manual resolve of trivial conflict in arch/i386/kernel/Makefile

147 files changed:
Documentation/x86_64/boot-options.txt
arch/i386/Kconfig
arch/i386/boot/Makefile
arch/i386/boot/compressed/misc.c
arch/i386/kernel/Makefile
arch/i386/kernel/alternative.c
arch/i386/kernel/apic.c
arch/i386/kernel/apm.c
arch/i386/kernel/cpu/amd.c
arch/i386/kernel/cpu/intel.c
arch/i386/kernel/cpu/intel_cacheinfo.c
arch/i386/kernel/crash.c
arch/i386/kernel/entry.S
arch/i386/kernel/io_apic.c
arch/i386/kernel/irq.c
arch/i386/kernel/nmi.c
arch/i386/kernel/process.c
arch/i386/kernel/smp.c
arch/i386/kernel/smpboot.c
arch/i386/kernel/traps.c
arch/i386/kernel/vmlinux.lds.S
arch/i386/oprofile/op_model_athlon.c
arch/i386/oprofile/op_model_p4.c
arch/i386/oprofile/op_model_ppro.c
arch/ia64/kernel/process.c
arch/x86_64/Kconfig
arch/x86_64/Kconfig.debug
arch/x86_64/Makefile
arch/x86_64/boot/Makefile
arch/x86_64/boot/compressed/misc.c
arch/x86_64/boot/tools/build.c
arch/x86_64/defconfig
arch/x86_64/ia32/fpu32.c
arch/x86_64/ia32/ia32_signal.c
arch/x86_64/ia32/ia32entry.S
arch/x86_64/ia32/ptrace32.c
arch/x86_64/ia32/sys_ia32.c
arch/x86_64/kernel/Makefile
arch/x86_64/kernel/aperture.c
arch/x86_64/kernel/apic.c
arch/x86_64/kernel/crash.c
arch/x86_64/kernel/e820.c
arch/x86_64/kernel/entry.S
arch/x86_64/kernel/genapic_flat.c
arch/x86_64/kernel/head64.c
arch/x86_64/kernel/i8259.c
arch/x86_64/kernel/io_apic.c
arch/x86_64/kernel/irq.c
arch/x86_64/kernel/k8.c [new file with mode: 0644]
arch/x86_64/kernel/mce.c
arch/x86_64/kernel/mce_amd.c
arch/x86_64/kernel/module.c
arch/x86_64/kernel/nmi.c
arch/x86_64/kernel/pci-calgary.c [new file with mode: 0644]
arch/x86_64/kernel/pci-dma.c
arch/x86_64/kernel/pci-gart.c
arch/x86_64/kernel/pci-nommu.c
arch/x86_64/kernel/pci-swiotlb.c
arch/x86_64/kernel/process.c
arch/x86_64/kernel/reboot.c
arch/x86_64/kernel/setup.c
arch/x86_64/kernel/setup64.c
arch/x86_64/kernel/signal.c
arch/x86_64/kernel/smp.c
arch/x86_64/kernel/smpboot.c
arch/x86_64/kernel/tce.c [new file with mode: 0644]
arch/x86_64/kernel/time.c
arch/x86_64/kernel/traps.c
arch/x86_64/kernel/vmlinux.lds.S
arch/x86_64/kernel/vsyscall.c
arch/x86_64/kernel/x8664_ksyms.c
arch/x86_64/lib/csum-partial.c
arch/x86_64/lib/csum-wrappers.c
arch/x86_64/lib/delay.c
arch/x86_64/lib/memmove.c
arch/x86_64/lib/usercopy.c
arch/x86_64/mm/fault.c
arch/x86_64/mm/init.c
arch/x86_64/mm/ioremap.c
arch/x86_64/pci/k8-bus.c
drivers/acpi/processor_idle.c
drivers/char/agp/Kconfig
drivers/char/agp/amd64-agp.c
drivers/pci/msi-apic.c
drivers/scsi/aacraid/comminit.c
fs/compat.c
include/asm-i386/alternative.h
include/asm-i386/apic.h
include/asm-i386/cpufeature.h
include/asm-i386/dwarf2.h [new file with mode: 0644]
include/asm-i386/hw_irq.h
include/asm-i386/intel_arch_perfmon.h [new file with mode: 0644]
include/asm-i386/k8.h [new file with mode: 0644]
include/asm-i386/local.h
include/asm-i386/mach-default/mach_ipi.h
include/asm-i386/nmi.h
include/asm-i386/processor.h
include/asm-i386/thread_info.h
include/asm-i386/unwind.h [new file with mode: 0644]
include/asm-ia64/thread_info.h
include/asm-x86_64/alternative.h [new file with mode: 0644]
include/asm-x86_64/apic.h
include/asm-x86_64/atomic.h
include/asm-x86_64/bitops.h
include/asm-x86_64/calgary.h [new file with mode: 0644]
include/asm-x86_64/cpufeature.h
include/asm-x86_64/dma-mapping.h
include/asm-x86_64/dma.h
include/asm-x86_64/gart-mapping.h [deleted file]
include/asm-x86_64/hpet.h
include/asm-x86_64/hw_irq.h
include/asm-x86_64/ia32_unistd.h
include/asm-x86_64/intel_arch_perfmon.h [new file with mode: 0644]
include/asm-x86_64/k8.h [new file with mode: 0644]
include/asm-x86_64/local.h
include/asm-x86_64/mce.h
include/asm-x86_64/mutex.h
include/asm-x86_64/nmi.h
include/asm-x86_64/pci.h
include/asm-x86_64/pgtable.h
include/asm-x86_64/processor.h
include/asm-x86_64/proto.h
include/asm-x86_64/rwlock.h
include/asm-x86_64/semaphore.h
include/asm-x86_64/smp.h
include/asm-x86_64/spinlock.h
include/asm-x86_64/string.h
include/asm-x86_64/system.h
include/asm-x86_64/tce.h [new file with mode: 0644]
include/asm-x86_64/thread_info.h
include/asm-x86_64/topology.h
include/asm-x86_64/unwind.h [new file with mode: 0644]
include/linux/bitmap.h
include/linux/compat.h
include/linux/kernel.h
include/linux/module.h
include/linux/sysctl.h
include/linux/time.h
include/linux/unwind.h [new file with mode: 0644]
init/Kconfig
init/main.c
kernel/Makefile
kernel/module.c
kernel/sched.c
kernel/sysctl.c
kernel/unwind.c [new file with mode: 0644]
lib/Kconfig.debug

index f2cd6ef53ff33e92f6545a43a7f2498c0582b107..6887d44d266188c9d5d17fc267b2441cc82d8e39 100644 (file)
@@ -205,6 +205,27 @@ IOMMU
   pages  Prereserve that many 128K pages for the software IO bounce buffering.
   force  Force all IO through the software TLB.
 
+  calgary=[64k,128k,256k,512k,1M,2M,4M,8M]
+  calgary=[translate_empty_slots]
+  calgary=[disable=<PCI bus number>]
+
+    64k,...,8M - Set the size of each PCI slot's translation table
+    when using the Calgary IOMMU. This is the size of the translation
+    table itself in main memory. The smallest table, 64k, covers an IO
+    space of 32MB; the largest, 8MB table, can cover an IO space of
+    4GB. Normally the kernel will make the right choice by itself.
+
+    translate_empty_slots - Enable translation even on slots that have
+    no devices attached to them, in case a device will be hotplugged
+    in the future.
+
+    disable=<PCI bus number> - Disable translation on a given PHB. For
+    example, the built-in graphics adapter resides on the first bridge
+    (PCI bus number 0); if translation (isolation) is enabled on this
+    bridge, X servers that access the hardware directly from user
+    space might stop working. Use this option if you have devices that
+    are accessed from userspace directly on some PCI host bridge.
+
 Debugging
 
   oops=panic Always panic on oopses. Default is to just kill the process,
index 374fb50608a0d6a41b8db64ff20809fdb7e332d7..f3eaf22f273df148cba00f3264c4af42e9286319 100644 (file)
@@ -328,6 +328,15 @@ config X86_MCE_P4THERMAL
          Enabling this feature will cause a message to be printed when the P4
          enters thermal throttling.
 
+config VM86
+       default y
+       bool "Enable VM86 support" if EMBEDDED
+       help
+          This option is required by programs like DOSEMU to run 16-bit legacy
+         code on X86 processors. It also may be needed by software like
+          XFree86 to initialize some video cards via BIOS. Disabling this
+          option saves about 6k.
+
 config TOSHIBA
        tristate "Toshiba Laptop support"
        ---help---
@@ -1068,6 +1077,10 @@ config SCx200HR_TIMER
          processor goes idle (as is done by the scheduler).  The
          other workaround is idle=poll boot option.
 
+config K8_NB
+       def_bool y
+       depends on AGP_AMD64
+
 source "drivers/pcmcia/Kconfig"
 
 source "drivers/pci/hotplug/Kconfig"
index 33e55476381be9a5954ab616c541955d933fb841..e9794662606458225b592cd39c36fc4056b52fc3 100644 (file)
@@ -109,8 +109,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
 isoimage: $(BOOTIMAGE)
        -rm -rf $(obj)/isoimage
        mkdir $(obj)/isoimage
-       cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \
-               $(obj)/isoimage
+       for i in lib lib64 share end ; do \
+               if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
+                       cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
+                       break ; \
+               fi ; \
+               if [ $$i = end ] ; then exit 1 ; fi ; \
+       done
        cp $(BOOTIMAGE) $(obj)/isoimage/linux
        echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
        if [ -f '$(FDINITRD)' ] ; then \
index f19f3a7492a5699703c16aab29b26537cf91c32e..b2ccd543410d51314a38f75048d71306d005a969 100644 (file)
 
 #undef memset
 #undef memcpy
-
-/*
- * Why do we do this? Don't ask me..
- *
- * Incomprehensible are the ways of bootloaders.
- */
-static void* memset(void *, int, size_t);
-static void* memcpy(void *, __const void *, size_t);
 #define memzero(s, n)     memset ((s), 0, (n))
 
 typedef unsigned char  uch;
@@ -93,7 +85,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */
 #endif
 #define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
 
-extern char input_data[];
+extern unsigned char input_data[];
 extern int input_len;
 
 static long bytes_out = 0;
@@ -103,6 +95,9 @@ static unsigned long output_ptr = 0;
 static void *malloc(int size);
 static void free(void *where);
 
+static void *memset(void *s, int c, unsigned n);
+static void *memcpy(void *dest, const void *src, unsigned n);
+
 static void putstr(const char *);
 
 extern int end;
@@ -205,7 +200,7 @@ static void putstr(const char *s)
        outb_p(0xff & (pos >> 1), vidport+1);
 }
 
-static void* memset(void* s, int c, size_t n)
+static void* memset(void* s, int c, unsigned n)
 {
        int i;
        char *ss = (char*)s;
@@ -214,14 +209,13 @@ static void* memset(void* s, int c, size_t n)
        return s;
 }
 
-static void* memcpy(void* __dest, __const void* __src,
-                           size_t __n)
+static void* memcpy(void* dest, const void* src, unsigned n)
 {
        int i;
-       char *d = (char *)__dest, *s = (char *)__src;
+       char *d = (char *)dest, *s = (char *)src;
 
-       for (i=0;i<__n;i++) d[i] = s[i];
-       return __dest;
+       for (i=0;i<n;i++) d[i] = s[i];
+       return dest;
 }
 
 /* ===========================================================================
@@ -309,7 +303,7 @@ static void setup_normal_output_buffer(void)
 #else
        if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
 #endif
-       output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */
+       output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
        free_mem_end_ptr = (long)real_mode;
 }
 
@@ -324,11 +318,9 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
 #ifdef STANDARD_MEMORY_BIOS_CALL
        if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
 #else
-       if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) <
-                       (3*1024))
-               error("Less than 4MB of memory");
+       if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
 #endif 
-       mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
+       mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
        low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
          ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
        low_buffer_size = low_buffer_end - LOW_BUFFER_START;
index 0fac85df64f1d51f4274aefd4cf3ff750427cc08..5e70c2fb273ae0a814f7148f61522a6f9040e02f 100644 (file)
@@ -37,6 +37,7 @@ obj-$(CONFIG_DOUBLEFAULT)     += doublefault.o
 obj-$(CONFIG_VM86)             += vm86.o
 obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
 obj-$(CONFIG_HPET_TIMER)       += hpet.o
+obj-$(CONFIG_K8_NB)            += k8.o
 
 EXTRA_AFLAGS   := -traditional
 
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r
 $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
                        $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
        $(call if_changed,syscall)
+
+k8-y                      += ../../x86_64/kernel/k8.o
+
index 5cbd6f99fb2afb53c8c1c4835df28ae86fc883ec..50eb0e03777e4b1a7f0fe564f507b6ccfe358f18 100644 (file)
@@ -4,27 +4,41 @@
 #include <asm/alternative.h>
 #include <asm/sections.h>
 
-#define DEBUG 0
-#if DEBUG
-# define DPRINTK(fmt, args...) printk(fmt, args)
-#else
-# define DPRINTK(fmt, args...)
-#endif
+static int no_replacement    = 0;
+static int smp_alt_once      = 0;
+static int debug_alternative = 0;
+
+static int __init noreplacement_setup(char *s)
+{
+       no_replacement = 1;
+       return 1;
+}
+static int __init bootonly(char *str)
+{
+       smp_alt_once = 1;
+       return 1;
+}
+static int __init debug_alt(char *str)
+{
+       debug_alternative = 1;
+       return 1;
+}
 
+__setup("noreplacement", noreplacement_setup);
+__setup("smp-alt-boot", bootonly);
+__setup("debug-alternative", debug_alt);
+
+#define DPRINTK(fmt, args...) if (debug_alternative) \
+       printk(KERN_DEBUG fmt, args)
+
+#ifdef GENERIC_NOP1
 /* Use inline assembly to define this because the nops are defined
    as inline assembly strings in the include files and we cannot
    get them easily into strings. */
 asm("\t.data\nintelnops: "
        GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
        GENERIC_NOP7 GENERIC_NOP8);
-asm("\t.data\nk8nops: "
-       K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
-       K8_NOP7 K8_NOP8);
-asm("\t.data\nk7nops: "
-       K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
-       K7_NOP7 K7_NOP8);
-
-extern unsigned char intelnops[], k8nops[], k7nops[];
+extern unsigned char intelnops[];
 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
        NULL,
        intelnops,
@@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
        intelnops + 1 + 2 + 3 + 4 + 5 + 6,
        intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 };
+#endif
+
+#ifdef K8_NOP1
+asm("\t.data\nk8nops: "
+       K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
+       K8_NOP7 K8_NOP8);
+extern unsigned char k8nops[];
 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
        NULL,
        k8nops,
@@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
        k8nops + 1 + 2 + 3 + 4 + 5 + 6,
        k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 };
+#endif
+
+#ifdef K7_NOP1
+asm("\t.data\nk7nops: "
+       K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
+       K7_NOP7 K7_NOP8);
+extern unsigned char k7nops[];
 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
        NULL,
        k7nops,
@@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
        k7nops + 1 + 2 + 3 + 4 + 5 + 6,
        k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 };
+#endif
+
+#ifdef CONFIG_X86_64
+
+extern char __vsyscall_0;
+static inline unsigned char** find_nop_table(void)
+{
+       return k8_nops;
+}
+
+#else /* CONFIG_X86_64 */
+
 static struct nop {
        int cpuid;
        unsigned char **noptable;
@@ -67,14 +107,6 @@ static struct nop {
        { -1, NULL }
 };
 
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
-extern u8 *__smp_locks[], *__smp_locks_end[];
-
-extern u8 __smp_alt_begin[], __smp_alt_end[];
-
-
 static unsigned char** find_nop_table(void)
 {
        unsigned char **noptable = intel_nops;
@@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void)
        return noptable;
 }
 
+#endif /* CONFIG_X86_64 */
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
+extern u8 *__smp_locks[], *__smp_locks_end[];
+
+extern u8 __smp_alt_begin[], __smp_alt_end[];
+
 /* Replace instructions with better alternatives for this CPU type.
    This runs before SMP is initialized to avoid SMP problems with
    self modifying code. This implies that assymetric systems where
@@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 {
        unsigned char **noptable = find_nop_table();
        struct alt_instr *a;
+       u8 *instr;
        int diff, i, k;
 
        DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
@@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
                BUG_ON(a->replacementlen > a->instrlen);
                if (!boot_cpu_has(a->cpuid))
                        continue;
-               memcpy(a->instr, a->replacement, a->replacementlen);
+               instr = a->instr;
+#ifdef CONFIG_X86_64
+               /* vsyscall code is not mapped yet. resolve it manually. */
+               if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
+                       instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
+                       DPRINTK("%s: vsyscall fixup: %p => %p\n",
+                               __FUNCTION__, a->instr, instr);
+               }
+#endif
+               memcpy(instr, a->replacement, a->replacementlen);
                diff = a->instrlen - a->replacementlen;
                /* Pad the rest with nops */
                for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
@@ -186,14 +236,6 @@ struct smp_alt_module {
 static LIST_HEAD(smp_alt_modules);
 static DEFINE_SPINLOCK(smp_alt);
 
-static int smp_alt_once = 0;
-static int __init bootonly(char *str)
-{
-       smp_alt_once = 1;
-       return 1;
-}
-__setup("smp-alt-boot", bootonly);
-
 void alternatives_smp_module_add(struct module *mod, char *name,
                                 void *locks, void *locks_end,
                                 void *text,  void *text_end)
@@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
        struct smp_alt_module *smp;
        unsigned long flags;
 
+       if (no_replacement)
+               return;
+
        if (smp_alt_once) {
                if (boot_cpu_has(X86_FEATURE_UP))
                        alternatives_smp_unlock(locks, locks_end,
@@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod)
        struct smp_alt_module *item;
        unsigned long flags;
 
-       if (smp_alt_once)
+       if (no_replacement || smp_alt_once)
                return;
 
        spin_lock_irqsave(&smp_alt, flags);
@@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp)
        struct smp_alt_module *mod;
        unsigned long flags;
 
-       if (smp_alt_once)
+       if (no_replacement || smp_alt_once)
                return;
        BUG_ON(!smp && (num_online_cpus() > 1));
 
@@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp)
 
 void __init alternative_instructions(void)
 {
+       if (no_replacement) {
+               printk(KERN_INFO "(SMP-)alternatives turned off\n");
+               free_init_pages("SMP alternatives",
+                               (unsigned long)__smp_alt_begin,
+                               (unsigned long)__smp_alt_end);
+               return;
+       }
        apply_alternatives(__alt_instructions, __alt_instructions_end);
 
        /* switch to patch-once-at-boottime-only mode and free the
index 5ab59c12335bef0fbab075e307184ee00c58f443..7ce09492fc0ca0bee5fdac224a38d4e509df6c20 100644 (file)
@@ -36,6 +36,7 @@
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
 #include <asm/i8253.h>
+#include <asm/nmi.h>
 
 #include <mach_apic.h>
 #include <mach_apicdef.h>
@@ -156,7 +157,7 @@ void clear_local_APIC(void)
        maxlvt = get_maxlvt();
 
        /*
-        * Masking an LVT entry on a P6 can trigger a local APIC error
+        * Masking an LVT entry can trigger a local APIC error
         * if the vector is zero. Mask LVTERR first to prevent this.
         */
        if (maxlvt >= 3) {
@@ -1117,7 +1118,18 @@ void disable_APIC_timer(void)
                unsigned long v;
 
                v = apic_read(APIC_LVTT);
-               apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+               /*
+                * When an illegal vector value (0-15) is written to an LVT
+                * entry and delivery mode is Fixed, the APIC may signal an
+                * illegal vector error, with out regard to whether the mask
+                * bit is set or whether an interrupt is actually seen on input.
+                *
+                * Boot sequence might call this function when the LVTT has
+                * '0' vector value. So make sure vector field is set to
+                * valid value.
+                */
+               v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+               apic_write_around(APIC_LVTT, v);
        }
 }
 
index 9e819eb68229776b937ce35760bc00de2d28d69e..7c5729d1fd06ebb00a92685a296686cc615dc489 100644 (file)
@@ -764,9 +764,9 @@ static int apm_do_idle(void)
        int     idled = 0;
        int     polling;
 
-       polling = test_thread_flag(TIF_POLLING_NRFLAG);
+       polling = !!(current_thread_info()->status & TS_POLLING);
        if (polling) {
-               clear_thread_flag(TIF_POLLING_NRFLAG);
+               current_thread_info()->status &= ~TS_POLLING;
                smp_mb__after_clear_bit();
        }
        if (!need_resched()) {
@@ -774,7 +774,7 @@ static int apm_do_idle(void)
                ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
        }
        if (polling)
-               set_thread_flag(TIF_POLLING_NRFLAG);
+               current_thread_info()->status |= TS_POLLING;
 
        if (!idled)
                return 0;
index 786d1a57048b9156396a7f7d5739306097420c60..fd0457c9c827f406e9e7f35d490bf7c0c648fe01 100644 (file)
@@ -224,15 +224,17 @@ static void __init init_amd(struct cpuinfo_x86 *c)
 
 #ifdef CONFIG_X86_HT
        /*
-        * On a AMD dual core setup the lower bits of the APIC id
-        * distingush the cores.  Assumes number of cores is a power
-        * of two.
+        * On a AMD multi core setup the lower bits of the APIC id
+        * distingush the cores.
         */
        if (c->x86_max_cores > 1) {
                int cpu = smp_processor_id();
-               unsigned bits = 0;
-               while ((1 << bits) < c->x86_max_cores)
-                       bits++;
+               unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
+
+               if (bits == 0) {
+                       while ((1 << bits) < c->x86_max_cores)
+                               bits++;
+               }
                cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
                phys_proc_id[cpu] >>= bits;
                printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
@@ -240,6 +242,8 @@ static void __init init_amd(struct cpuinfo_x86 *c)
        }
 #endif
 
+       if (cpuid_eax(0x80000000) >= 0x80000006)
+               num_cache_leaves = 3;
 }
 
 static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
index 5386b29bb5a51a6094b19cb3e3e531624827d5ec..10afc645c540c844576493370ce64d7fd8a8d1c8 100644 (file)
@@ -122,6 +122,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
        select_idle_routine(c);
        l2 = init_intel_cacheinfo(c);
+       if (c->cpuid_level > 9 ) {
+               unsigned eax = cpuid_eax(10);
+               /* Check for version and the number of counters */
+               if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+                       set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+       }
 
        /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
        if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
index c8547a6fa7e6179f41d0c1be2771d34448b33711..6c37b4fd8ce285c293306788656b016e83cf3da3 100644 (file)
@@ -4,6 +4,7 @@
  *      Changes:
  *      Venkatesh Pallipadi    : Adding cache identification through cpuid(4)
  *             Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
+ *     Andi Kleen              : CPUID4 emulation on AMD.
  */
 
 #include <linux/init.h>
@@ -130,25 +131,111 @@ struct _cpuid4_info {
        cpumask_t shared_cpu_map;
 };
 
-static unsigned short                  num_cache_leaves;
+unsigned short                 num_cache_leaves;
+
+/* AMD doesn't have CPUID4. Emulate it here to report the same
+   information to the user.  This makes some assumptions about the machine:
+   No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+
+   In theory the TLBs could be reported as fake type (they are in "dummy").
+   Maybe later */
+union l1_cache {
+       struct {
+               unsigned line_size : 8;
+               unsigned lines_per_tag : 8;
+               unsigned assoc : 8;
+               unsigned size_in_kb : 8;
+       };
+       unsigned val;
+};
+
+union l2_cache {
+       struct {
+               unsigned line_size : 8;
+               unsigned lines_per_tag : 4;
+               unsigned assoc : 4;
+               unsigned size_in_kb : 16;
+       };
+       unsigned val;
+};
+
+static unsigned short assocs[] = {
+       [1] = 1, [2] = 2, [4] = 4, [6] = 8,
+       [8] = 16,
+       [0xf] = 0xffff // ??
+       };
+static unsigned char levels[] = { 1, 1, 2 };
+static unsigned char types[] = { 1, 2, 3 };
+
+static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+                      union _cpuid4_leaf_ebx *ebx,
+                      union _cpuid4_leaf_ecx *ecx)
+{
+       unsigned dummy;
+       unsigned line_size, lines_per_tag, assoc, size_in_kb;
+       union l1_cache l1i, l1d;
+       union l2_cache l2;
+
+       eax->full = 0;
+       ebx->full = 0;
+       ecx->full = 0;
+
+       cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
+       cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
+
+       if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
+               return;
+
+       eax->split.is_self_initializing = 1;
+       eax->split.type = types[leaf];
+       eax->split.level = levels[leaf];
+       eax->split.num_threads_sharing = 0;
+       eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
+
+       if (leaf <= 1) {
+               union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
+               assoc = l1->assoc;
+               line_size = l1->line_size;
+               lines_per_tag = l1->lines_per_tag;
+               size_in_kb = l1->size_in_kb;
+       } else {
+               assoc = l2.assoc;
+               line_size = l2.line_size;
+               lines_per_tag = l2.lines_per_tag;
+               /* cpu_data has errata corrections for K7 applied */
+               size_in_kb = current_cpu_data.x86_cache_size;
+       }
+
+       if (assoc == 0xf)
+               eax->split.is_fully_associative = 1;
+       ebx->split.coherency_line_size = line_size - 1;
+       ebx->split.ways_of_associativity = assocs[assoc] - 1;
+       ebx->split.physical_line_partition = lines_per_tag - 1;
+       ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
+               (ebx->split.ways_of_associativity + 1) - 1;
+}
 
 static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
 {
-       unsigned int            eax, ebx, ecx, edx;
-       union _cpuid4_leaf_eax  cache_eax;
+       union _cpuid4_leaf_eax  eax;
+       union _cpuid4_leaf_ebx  ebx;
+       union _cpuid4_leaf_ecx  ecx;
+       unsigned                edx;
 
-       cpuid_count(4, index, &eax, &ebx, &ecx, &edx);
-       cache_eax.full = eax;
-       if (cache_eax.split.type == CACHE_TYPE_NULL)
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+               amd_cpuid4(index, &eax, &ebx, &ecx);
+       else
+               cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full,  &edx);
+       if (eax.split.type == CACHE_TYPE_NULL)
                return -EIO; /* better error ? */
 
-       this_leaf->eax.full = eax;
-       this_leaf->ebx.full = ebx;
-       this_leaf->ecx.full = ecx;
-       this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) *
-               (this_leaf->ebx.split.coherency_line_size + 1) *
-               (this_leaf->ebx.split.physical_line_partition + 1) *
-               (this_leaf->ebx.split.ways_of_associativity + 1);
+       this_leaf->eax = eax;
+       this_leaf->ebx = ebx;
+       this_leaf->ecx = ecx;
+       this_leaf->size = (ecx.split.number_of_sets + 1) *
+               (ebx.split.coherency_line_size + 1) *
+               (ebx.split.physical_line_partition + 1) *
+               (ebx.split.ways_of_associativity + 1);
        return 0;
 }
 
index 21dc1bbb806722bf26bf5d4348d01d386b72d7ce..0c88d3ec8c18bb6d19cf6db11c38c0d72ac65685 100644 (file)
@@ -120,14 +120,9 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
        return 1;
 }
 
-/*
- * By using the NMI code instead of a vector we just sneak thru the
- * word generator coming out with just what we want.  AND it does
- * not matter if clustered_apic_mode is set or not.
- */
 static void smp_send_nmi_allbutself(void)
 {
-       send_IPI_allbutself(APIC_DM_NMI);
+       send_IPI_allbutself(NMI_VECTOR);
 }
 
 static void nmi_shootdown_cpus(void)
index cfc683f153b916b1d57e27769fd8f8de132bbb7f..e6e4506e749acbe079f9196fab1a05a6e333a753 100644 (file)
@@ -48,6 +48,7 @@
 #include <asm/smp.h>
 #include <asm/page.h>
 #include <asm/desc.h>
+#include <asm/dwarf2.h>
 #include "irq_vectors.h"
 
 #define nr_syscalls ((syscall_table_size)/4)
@@ -85,31 +86,67 @@ VM_MASK             = 0x00020000
 #define SAVE_ALL \
        cld; \
        pushl %es; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       /*CFI_REL_OFFSET es, 0;*/\
        pushl %ds; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       /*CFI_REL_OFFSET ds, 0;*/\
        pushl %eax; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET eax, 0;\
        pushl %ebp; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET ebp, 0;\
        pushl %edi; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET edi, 0;\
        pushl %esi; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET esi, 0;\
        pushl %edx; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET edx, 0;\
        pushl %ecx; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET ecx, 0;\
        pushl %ebx; \
+       CFI_ADJUST_CFA_OFFSET 4;\
+       CFI_REL_OFFSET ebx, 0;\
        movl $(__USER_DS), %edx; \
        movl %edx, %ds; \
        movl %edx, %es;
 
 #define RESTORE_INT_REGS \
        popl %ebx;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE ebx;\
        popl %ecx;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE ecx;\
        popl %edx;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE edx;\
        popl %esi;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE esi;\
        popl %edi;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE edi;\
        popl %ebp;      \
-       popl %eax
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE ebp;\
+       popl %eax;      \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       CFI_RESTORE eax
 
 #define RESTORE_REGS   \
        RESTORE_INT_REGS; \
 1:     popl %ds;       \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       /*CFI_RESTORE ds;*/\
 2:     popl %es;       \
+       CFI_ADJUST_CFA_OFFSET -4;\
+       /*CFI_RESTORE es;*/\
 .section .fixup,"ax";  \
 3:     movl $0,(%esp); \
        jmp 1b;         \
@@ -122,13 +159,43 @@ VM_MASK           = 0x00020000
        .long 2b,4b;    \
 .previous
 
+#define RING0_INT_FRAME \
+       CFI_STARTPROC simple;\
+       CFI_DEF_CFA esp, 3*4;\
+       /*CFI_OFFSET cs, -2*4;*/\
+       CFI_OFFSET eip, -3*4
+
+#define RING0_EC_FRAME \
+       CFI_STARTPROC simple;\
+       CFI_DEF_CFA esp, 4*4;\
+       /*CFI_OFFSET cs, -2*4;*/\
+       CFI_OFFSET eip, -3*4
+
+#define RING0_PTREGS_FRAME \
+       CFI_STARTPROC simple;\
+       CFI_DEF_CFA esp, OLDESP-EBX;\
+       /*CFI_OFFSET cs, CS-OLDESP;*/\
+       CFI_OFFSET eip, EIP-OLDESP;\
+       /*CFI_OFFSET es, ES-OLDESP;*/\
+       /*CFI_OFFSET ds, DS-OLDESP;*/\
+       CFI_OFFSET eax, EAX-OLDESP;\
+       CFI_OFFSET ebp, EBP-OLDESP;\
+       CFI_OFFSET edi, EDI-OLDESP;\
+       CFI_OFFSET esi, ESI-OLDESP;\
+       CFI_OFFSET edx, EDX-OLDESP;\
+       CFI_OFFSET ecx, ECX-OLDESP;\
+       CFI_OFFSET ebx, EBX-OLDESP
 
 ENTRY(ret_from_fork)
+       CFI_STARTPROC
        pushl %eax
+       CFI_ADJUST_CFA_OFFSET -4
        call schedule_tail
        GET_THREAD_INFO(%ebp)
        popl %eax
+       CFI_ADJUST_CFA_OFFSET -4
        jmp syscall_exit
+       CFI_ENDPROC
 
 /*
  * Return to user mode is not as complex as all this looks,
@@ -139,6 +206,7 @@ ENTRY(ret_from_fork)
 
        # userspace resumption stub bypassing syscall exit tracing
        ALIGN
+       RING0_PTREGS_FRAME
 ret_from_exception:
        preempt_stop
 ret_from_intr:
@@ -171,20 +239,33 @@ need_resched:
        call preempt_schedule_irq
        jmp need_resched
 #endif
+       CFI_ENDPROC
 
 /* SYSENTER_RETURN points to after the "sysenter" instruction in
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
 
        # sysenter call handler stub
 ENTRY(sysenter_entry)
+       CFI_STARTPROC simple
+       CFI_DEF_CFA esp, 0
+       CFI_REGISTER esp, ebp
        movl TSS_sysenter_esp0(%esp),%esp
 sysenter_past_esp:
        sti
        pushl $(__USER_DS)
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET ss, 0*/
        pushl %ebp
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET esp, 0
        pushfl
+       CFI_ADJUST_CFA_OFFSET 4
        pushl $(__USER_CS)
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET cs, 0*/
        pushl $SYSENTER_RETURN
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET eip, 0
 
 /*
  * Load the potential sixth argument from user stack.
@@ -199,6 +280,7 @@ sysenter_past_esp:
 .previous
 
        pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
        SAVE_ALL
        GET_THREAD_INFO(%ebp)
 
@@ -219,11 +301,14 @@ sysenter_past_esp:
        xorl %ebp,%ebp
        sti
        sysexit
+       CFI_ENDPROC
 
 
        # system call handler stub
 ENTRY(system_call)
+       RING0_INT_FRAME                 # can't unwind into user space anyway
        pushl %eax                      # save orig_eax
+       CFI_ADJUST_CFA_OFFSET 4
        SAVE_ALL
        GET_THREAD_INFO(%ebp)
        testl $TF_MASK,EFLAGS(%esp)
@@ -256,10 +341,12 @@ restore_all:
        movb CS(%esp), %al
        andl $(VM_MASK | (4 << 8) | 3), %eax
        cmpl $((4 << 8) | 3), %eax
+       CFI_REMEMBER_STATE
        je ldt_ss                       # returning to user-space with LDT SS
 restore_nocheck:
        RESTORE_REGS
        addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
 1:     iret
 .section .fixup,"ax"
 iret_exc:
@@ -273,6 +360,7 @@ iret_exc:
        .long 1b,iret_exc
 .previous
 
+       CFI_RESTORE_STATE
 ldt_ss:
        larl OLDSS(%esp), %eax
        jnz restore_nocheck
@@ -285,11 +373,13 @@ ldt_ss:
         * CPUs, which we can try to work around to make
         * dosemu and wine happy. */
        subl $8, %esp           # reserve space for switch16 pointer
+       CFI_ADJUST_CFA_OFFSET 8
        cli
        movl %esp, %eax
        /* Set up the 16bit stack frame with switch32 pointer on top,
         * and a switch16 pointer on top of the current frame. */
        call setup_x86_bogus_stack
+       CFI_ADJUST_CFA_OFFSET -8        # frame has moved
        RESTORE_REGS
        lss 20+4(%esp), %esp    # switch to 16bit stack
 1:     iret
@@ -297,9 +387,11 @@ ldt_ss:
        .align 4
        .long 1b,iret_exc
 .previous
+       CFI_ENDPROC
 
        # perform work that needs to be done immediately before resumption
        ALIGN
+       RING0_PTREGS_FRAME              # can't unwind into user space anyway
 work_pending:
        testb $_TIF_NEED_RESCHED, %cl
        jz work_notifysig
@@ -329,8 +421,10 @@ work_notifysig:                            # deal with pending signals and
 work_notifysig_v86:
 #ifdef CONFIG_VM86
        pushl %ecx                      # save ti_flags for do_notify_resume
+       CFI_ADJUST_CFA_OFFSET 4
        call save_v86_state             # %eax contains pt_regs pointer
        popl %ecx
+       CFI_ADJUST_CFA_OFFSET -4
        movl %eax, %esp
        xorl %edx, %edx
        call do_notify_resume
@@ -363,19 +457,21 @@ syscall_exit_work:
        movl $1, %edx
        call do_syscall_trace
        jmp resume_userspace
+       CFI_ENDPROC
 
-       ALIGN
+       RING0_INT_FRAME                 # can't unwind into user space anyway
 syscall_fault:
        pushl %eax                      # save orig_eax
+       CFI_ADJUST_CFA_OFFSET 4
        SAVE_ALL
        GET_THREAD_INFO(%ebp)
        movl $-EFAULT,EAX(%esp)
        jmp resume_userspace
 
-       ALIGN
 syscall_badsys:
        movl $-ENOSYS,EAX(%esp)
        jmp resume_userspace
+       CFI_ENDPROC
 
 #define FIXUP_ESPFIX_STACK \
        movl %esp, %eax; \
@@ -387,16 +483,21 @@ syscall_badsys:
        movl %eax, %esp;
 #define UNWIND_ESPFIX_STACK \
        pushl %eax; \
+       CFI_ADJUST_CFA_OFFSET 4; \
        movl %ss, %eax; \
        /* see if on 16bit stack */ \
        cmpw $__ESPFIX_SS, %ax; \
-       jne 28f; \
-       movl $__KERNEL_DS, %edx; \
-       movl %edx, %ds; \
-       movl %edx, %es; \
+       je 28f; \
+27:    popl %eax; \
+       CFI_ADJUST_CFA_OFFSET -4; \
+.section .fixup,"ax"; \
+28:    movl $__KERNEL_DS, %eax; \
+       movl %eax, %ds; \
+       movl %eax, %es; \
        /* switch to 32bit stack */ \
-       FIXUP_ESPFIX_STACK \
-28:    popl %eax;
+       FIXUP_ESPFIX_STACK; \
+       jmp 27b; \
+.previous
 
 /*
  * Build the entry stubs and pointer table with
@@ -408,9 +509,14 @@ ENTRY(interrupt)
 
 vector=0
 ENTRY(irq_entries_start)
+       RING0_INT_FRAME
 .rept NR_IRQS
        ALIGN
+ .if vector
+       CFI_ADJUST_CFA_OFFSET -4
+ .endif
 1:     pushl $vector-256
+       CFI_ADJUST_CFA_OFFSET 4
        jmp common_interrupt
 .data
        .long 1b
@@ -424,60 +530,99 @@ common_interrupt:
        movl %esp,%eax
        call do_IRQ
        jmp ret_from_intr
+       CFI_ENDPROC
 
 #define BUILD_INTERRUPT(name, nr)      \
 ENTRY(name)                            \
+       RING0_INT_FRAME;                \
        pushl $nr-256;                  \
-       SAVE_ALL                        \
+       CFI_ADJUST_CFA_OFFSET 4;        \
+       SAVE_ALL;                       \
        movl %esp,%eax;                 \
        call smp_/**/name;              \
-       jmp ret_from_intr;
+       jmp ret_from_intr;      \
+       CFI_ENDPROC
 
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
 
 ENTRY(divide_error)
+       RING0_INT_FRAME
        pushl $0                        # no error code
+       CFI_ADJUST_CFA_OFFSET 4
        pushl $do_divide_error
+       CFI_ADJUST_CFA_OFFSET 4
        ALIGN
 error_code:
        pushl %ds
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET ds, 0*/
        pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET eax, 0
        xorl %eax, %eax
        pushl %ebp
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET ebp, 0
        pushl %edi
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET edi, 0
        pushl %esi
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET esi, 0
        pushl %edx
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET edx, 0
        decl %eax                       # eax = -1
        pushl %ecx
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET ecx, 0
        pushl %ebx
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET ebx, 0
        cld
        pushl %es
+       CFI_ADJUST_CFA_OFFSET 4
+       /*CFI_REL_OFFSET es, 0*/
        UNWIND_ESPFIX_STACK
        popl %ecx
+       CFI_ADJUST_CFA_OFFSET -4
+       /*CFI_REGISTER es, ecx*/
        movl ES(%esp), %edi             # get the function address
        movl ORIG_EAX(%esp), %edx       # get the error code
        movl %eax, ORIG_EAX(%esp)
        movl %ecx, ES(%esp)
+       /*CFI_REL_OFFSET es, ES*/
        movl $(__USER_DS), %ecx
        movl %ecx, %ds
        movl %ecx, %es
        movl %esp,%eax                  # pt_regs pointer
        call *%edi
        jmp ret_from_exception
+       CFI_ENDPROC
 
 ENTRY(coprocessor_error)
+       RING0_INT_FRAME
        pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
        pushl $do_coprocessor_error
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
 
 ENTRY(simd_coprocessor_error)
+       RING0_INT_FRAME
        pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
        pushl $do_simd_coprocessor_error
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
 
 ENTRY(device_not_available)
+       RING0_INT_FRAME
        pushl $-1                       # mark this as an int
+       CFI_ADJUST_CFA_OFFSET 4
        SAVE_ALL
        movl %cr0, %eax
        testl $0x4, %eax                # EM (math emulation bit)
@@ -487,9 +632,12 @@ ENTRY(device_not_available)
        jmp ret_from_exception
 device_not_available_emulate:
        pushl $0                        # temporary storage for ORIG_EIP
+       CFI_ADJUST_CFA_OFFSET 4
        call math_emulate
        addl $4, %esp
+       CFI_ADJUST_CFA_OFFSET -4
        jmp ret_from_exception
+       CFI_ENDPROC
 
 /*
  * Debug traps and NMI can happen at the one SYSENTER instruction
@@ -514,16 +662,19 @@ label:                                            \
        pushl $sysenter_past_esp
 
 KPROBE_ENTRY(debug)
+       RING0_INT_FRAME
        cmpl $sysenter_entry,(%esp)
        jne debug_stack_correct
        FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
 debug_stack_correct:
        pushl $-1                       # mark this as an int
+       CFI_ADJUST_CFA_OFFSET 4
        SAVE_ALL
        xorl %edx,%edx                  # error code 0
        movl %esp,%eax                  # pt_regs pointer
        call do_debug
        jmp ret_from_exception
+       CFI_ENDPROC
        .previous .text
 /*
  * NMI is doubly nasty. It can happen _while_ we're handling
@@ -534,14 +685,18 @@ debug_stack_correct:
  * fault happened on the sysenter path.
  */
 ENTRY(nmi)
+       RING0_INT_FRAME
        pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
        movl %ss, %eax
        cmpw $__ESPFIX_SS, %ax
        popl %eax
+       CFI_ADJUST_CFA_OFFSET -4
        je nmi_16bit_stack
        cmpl $sysenter_entry,(%esp)
        je nmi_stack_fixup
        pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
        movl %esp,%eax
        /* Do not access memory above the end of our stack page,
         * it might not exist.
@@ -549,16 +704,19 @@ ENTRY(nmi)
        andl $(THREAD_SIZE-1),%eax
        cmpl $(THREAD_SIZE-20),%eax
        popl %eax
+       CFI_ADJUST_CFA_OFFSET -4
        jae nmi_stack_correct
        cmpl $sysenter_entry,12(%esp)
        je nmi_debug_stack_check
 nmi_stack_correct:
        pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
        SAVE_ALL
        xorl %edx,%edx          # zero error code
        movl %esp,%eax          # pt_regs pointer
        call do_nmi
        jmp restore_all
+       CFI_ENDPROC
 
 nmi_stack_fixup:
        FIX_STACK(12,nmi_stack_correct, 1)
@@ -574,94 +732,177 @@ nmi_debug_stack_check:
        jmp nmi_stack_correct
 
 nmi_16bit_stack:
+       RING0_INT_FRAME
        /* create the pointer to lss back */
        pushl %ss
+       CFI_ADJUST_CFA_OFFSET 4
        pushl %esp
+       CFI_ADJUST_CFA_OFFSET 4
        movzwl %sp, %esp
        addw $4, (%esp)
        /* copy the iret frame of 12 bytes */
        .rept 3
        pushl 16(%esp)
+       CFI_ADJUST_CFA_OFFSET 4
        .endr
        pushl %eax
+       CFI_ADJUST_CFA_OFFSET 4
        SAVE_ALL
        FIXUP_ESPFIX_STACK              # %eax == %esp
+       CFI_ADJUST_CFA_OFFSET -20       # the frame has now moved
        xorl %edx,%edx                  # zero error code
        call do_nmi
        RESTORE_REGS
        lss 12+4(%esp), %esp            # back to 16bit stack
 1:     iret
+       CFI_ENDPROC
 .section __ex_table,"a"
        .align 4
        .long 1b,iret_exc
 .previous
 
 KPROBE_ENTRY(int3)
+       RING0_INT_FRAME
        pushl $-1                       # mark this as an int
+       CFI_ADJUST_CFA_OFFSET 4
        SAVE_ALL
        xorl %edx,%edx          # zero error code
        movl %esp,%eax          # pt_regs pointer
        call do_int3
        jmp ret_from_exception
+       CFI_ENDPROC
        .previous .text
 
 ENTRY(overflow)
+       RING0_INT_FRAME
        pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
        pushl $do_overflow
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
 
 ENTRY(bounds)
+       RING0_INT_FRAME
        pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
        pushl $do_bounds
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
 
 ENTRY(invalid_op)
+       RING0_INT_FRAME
        pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
        pushl $do_invalid_op
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
 
 ENTRY(coprocessor_segment_overrun)
+       RING0_INT_FRAME
        pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
        pushl $do_coprocessor_segment_overrun
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
 
 ENTRY(invalid_TSS)
+       RING0_EC_FRAME
        pushl $do_invalid_TSS
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
 
 ENTRY(segment_not_present)
+       RING0_EC_FRAME
        pushl $do_segment_not_present
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
 
 ENTRY(stack_segment)
+       RING0_EC_FRAME
        pushl $do_stack_segment
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
 
 KPROBE_ENTRY(general_protection)
+       RING0_EC_FRAME
        pushl $do_general_protection
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
        .previous .text
 
 ENTRY(alignment_check)
+       RING0_EC_FRAME
        pushl $do_alignment_check
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
 
 KPROBE_ENTRY(page_fault)
+       RING0_EC_FRAME
        pushl $do_page_fault
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
        .previous .text
 
 #ifdef CONFIG_X86_MCE
 ENTRY(machine_check)
+       RING0_INT_FRAME
        pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
        pushl machine_check_vector
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
 #endif
 
 ENTRY(spurious_interrupt_bug)
+       RING0_INT_FRAME
        pushl $0
+       CFI_ADJUST_CFA_OFFSET 4
        pushl $do_spurious_interrupt_bug
+       CFI_ADJUST_CFA_OFFSET 4
        jmp error_code
+       CFI_ENDPROC
+
+#ifdef CONFIG_STACK_UNWIND
+ENTRY(arch_unwind_init_running)
+       CFI_STARTPROC
+       movl    4(%esp), %edx
+       movl    (%esp), %ecx
+       leal    4(%esp), %eax
+       movl    %ebx, EBX(%edx)
+       xorl    %ebx, %ebx
+       movl    %ebx, ECX(%edx)
+       movl    %ebx, EDX(%edx)
+       movl    %esi, ESI(%edx)
+       movl    %edi, EDI(%edx)
+       movl    %ebp, EBP(%edx)
+       movl    %ebx, EAX(%edx)
+       movl    $__USER_DS, DS(%edx)
+       movl    $__USER_DS, ES(%edx)
+       movl    %ebx, ORIG_EAX(%edx)
+       movl    %ecx, EIP(%edx)
+       movl    12(%esp), %ecx
+       movl    $__KERNEL_CS, CS(%edx)
+       movl    %ebx, EFLAGS(%edx)
+       movl    %eax, OLDESP(%edx)
+       movl    8(%esp), %eax
+       movl    %ecx, 8(%esp)
+       movl    EBX(%edx), %ebx
+       movl    $__KERNEL_DS, OLDSS(%edx)
+       jmpl    *%eax
+       CFI_ENDPROC
+ENDPROC(arch_unwind_init_running)
+#endif
 
 .section .rodata,"a"
 #include "syscall_table.S"
index a62df3e764c51c4b24e60287fb9b3de8634faa20..72ae414e4d4976437fefa8a01e1b38d97fdc0395 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/desc.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
+#include <asm/nmi.h>
 
 #include <mach_apic.h>
 
@@ -50,6 +51,7 @@ atomic_t irq_mis_count;
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
 
 int timer_over_8254 __initdata = 1;
 
@@ -1161,10 +1163,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
 int assign_irq_vector(int irq)
 {
        static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+       unsigned long flags;
+       int vector;
+
+       BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
 
-       BUG_ON(irq >= NR_IRQ_VECTORS);
-       if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
+       spin_lock_irqsave(&vector_lock, flags);
+
+       if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+               spin_unlock_irqrestore(&vector_lock, flags);
                return IO_APIC_VECTOR(irq);
+       }
 next:
        current_vector += 8;
        if (current_vector == SYSCALL_VECTOR)
@@ -1172,16 +1181,21 @@ next:
 
        if (current_vector >= FIRST_SYSTEM_VECTOR) {
                offset++;
-               if (!(offset%8))
+               if (!(offset%8)) {
+                       spin_unlock_irqrestore(&vector_lock, flags);
                        return -ENOSPC;
+               }
                current_vector = FIRST_DEVICE_VECTOR + offset;
        }
 
-       vector_irq[current_vector] = irq;
+       vector = current_vector;
+       vector_irq[vector] = irq;
        if (irq != AUTO_ASSIGN)
-               IO_APIC_VECTOR(irq) = current_vector;
+               IO_APIC_VECTOR(irq) = vector;
 
-       return current_vector;
+       spin_unlock_irqrestore(&vector_lock, flags);
+
+       return vector;
 }
 
 static struct hw_interrupt_type ioapic_level_type;
@@ -1193,21 +1207,14 @@ static struct hw_interrupt_type ioapic_edge_type;
 
 static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
-       if (use_pci_vector() && !platform_legacy_irq(irq)) {
-               if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-                               trigger == IOAPIC_LEVEL)
-                       irq_desc[vector].handler = &ioapic_level_type;
-               else
-                       irq_desc[vector].handler = &ioapic_edge_type;
-               set_intr_gate(vector, interrupt[vector]);
-       } else  {
-               if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-                               trigger == IOAPIC_LEVEL)
-                       irq_desc[irq].handler = &ioapic_level_type;
-               else
-                       irq_desc[irq].handler = &ioapic_edge_type;
-               set_intr_gate(vector, interrupt[irq]);
-       }
+       unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+                       trigger == IOAPIC_LEVEL)
+               irq_desc[idx].handler = &ioapic_level_type;
+       else
+               irq_desc[idx].handler = &ioapic_edge_type;
+       set_intr_gate(vector, interrupt[idx]);
 }
 
 static void __init setup_IO_APIC_irqs(void)
index 49ce4c31b713fc56a690d701389bc047e72a5a50..061533e0cb5e8efa284258c3619665a287a786e5 100644 (file)
@@ -227,7 +227,7 @@ int show_interrupts(struct seq_file *p, void *v)
        if (i == 0) {
                seq_printf(p, "           ");
                for_each_online_cpu(j)
-                       seq_printf(p, "CPU%d       ",j);
+                       seq_printf(p, "CPU%-8d",j);
                seq_putc(p, '\n');
        }
 
index d43b498ec745e2ea335d1282df274311eac62234..a76e931465858657ac10ffecca2834ff75707cb6 100644 (file)
  */
 
 #include <linux/config.h>
-#include <linux/mm.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
 #include <linux/module.h>
 #include <linux/nmi.h>
 #include <linux/sysdev.h>
 #include <linux/sysctl.h>
+#include <linux/percpu.h>
 
 #include <asm/smp.h>
-#include <asm/div64.h>
 #include <asm/nmi.h>
+#include <asm/intel_arch_perfmon.h>
 
 #include "mach_traps.h"
 
@@ -100,6 +96,9 @@ int nmi_active;
        (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|     \
         P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
 
+#define ARCH_PERFMON_NMI_EVENT_SEL     ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK   ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
 #ifdef CONFIG_SMP
 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
  * the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -212,6 +211,8 @@ static int __init setup_nmi_watchdog(char *str)
 
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
+static void disable_intel_arch_watchdog(void);
+
 static void disable_lapic_nmi_watchdog(void)
 {
        if (nmi_active <= 0)
@@ -221,6 +222,10 @@ static void disable_lapic_nmi_watchdog(void)
                wrmsr(MSR_K7_EVNTSEL0, 0, 0);
                break;
        case X86_VENDOR_INTEL:
+               if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+                       disable_intel_arch_watchdog();
+                       break;
+               }
                switch (boot_cpu_data.x86) {
                case 6:
                        if (boot_cpu_data.x86_model > 0xd)
@@ -449,6 +454,53 @@ static int setup_p4_watchdog(void)
        return 1;
 }
 
+static void disable_intel_arch_watchdog(void)
+{
+       unsigned ebx;
+
+       /*
+        * Check whether the Architectural PerfMon supports
+        * Unhalted Core Cycles Event or not.
+        * NOTE: Corresponding bit = 0 in ebp indicates event present.
+        */
+       ebx = cpuid_ebx(10);
+       if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+               wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
+}
+
+static int setup_intel_arch_watchdog(void)
+{
+       unsigned int evntsel;
+       unsigned ebx;
+
+       /*
+        * Check whether the Architectural PerfMon supports
+        * Unhalted Core Cycles Event or not.
+        * NOTE: Corresponding bit = 0 in ebp indicates event present.
+        */
+       ebx = cpuid_ebx(10);
+       if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+               return 0;
+
+       nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+
+       clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
+       clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
+
+       evntsel = ARCH_PERFMON_EVENTSEL_INT
+               | ARCH_PERFMON_EVENTSEL_OS
+               | ARCH_PERFMON_EVENTSEL_USR
+               | ARCH_PERFMON_NMI_EVENT_SEL
+               | ARCH_PERFMON_NMI_EVENT_UMASK;
+
+       wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+       write_watchdog_counter("INTEL_ARCH_PERFCTR0");
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+       wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+       return 1;
+}
+
 void setup_apic_nmi_watchdog (void)
 {
        switch (boot_cpu_data.x86_vendor) {
@@ -458,6 +510,11 @@ void setup_apic_nmi_watchdog (void)
                setup_k7_watchdog();
                break;
        case X86_VENDOR_INTEL:
+               if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+                       if (!setup_intel_arch_watchdog())
+                               return;
+                       break;
+               }
                switch (boot_cpu_data.x86) {
                case 6:
                        if (boot_cpu_data.x86_model > 0xd)
@@ -561,7 +618,8 @@ void nmi_watchdog_tick (struct pt_regs * regs)
                        wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
                        apic_write(APIC_LVTPC, APIC_DM_NMI);
                }
-               else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
+               else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
+                        nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
                        /* Only P6 based Pentium M need to re-unmask
                         * the apic vector but it doesn't hurt
                         * other P6 variant */
index 6259afea46d17db4eeda2ea1234c714540b4cadc..6946b06e278495cbbfa15383047e026ce7224120 100644 (file)
@@ -102,7 +102,7 @@ void default_idle(void)
        local_irq_enable();
 
        if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
-               clear_thread_flag(TIF_POLLING_NRFLAG);
+               current_thread_info()->status &= ~TS_POLLING;
                smp_mb__after_clear_bit();
                while (!need_resched()) {
                        local_irq_disable();
@@ -111,7 +111,7 @@ void default_idle(void)
                        else
                                local_irq_enable();
                }
-               set_thread_flag(TIF_POLLING_NRFLAG);
+               current_thread_info()->status |= TS_POLLING;
        } else {
                while (!need_resched())
                        cpu_relax();
@@ -174,7 +174,7 @@ void cpu_idle(void)
 {
        int cpu = smp_processor_id();
 
-       set_thread_flag(TIF_POLLING_NRFLAG);
+       current_thread_info()->status |= TS_POLLING;
 
        /* endless idle loop with no priority at all */
        while (1) {
@@ -312,7 +312,7 @@ void show_regs(struct pt_regs * regs)
        cr3 = read_cr3();
        cr4 = read_cr4_safe();
        printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
-       show_trace(NULL, &regs->esp);
+       show_trace(NULL, regs, &regs->esp);
 }
 
 /*
index d134e9643a58a0303cb67672486709d9da244591..c10789d7a9d38673194cf0074fa564a9fd64612c 100644 (file)
@@ -114,7 +114,17 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_m
 
 static inline int __prepare_ICR (unsigned int shortcut, int vector)
 {
-       return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+       unsigned int icr = shortcut | APIC_DEST_LOGICAL;
+
+       switch (vector) {
+       default:
+               icr |= APIC_DM_FIXED | vector;
+               break;
+       case NMI_VECTOR:
+               icr |= APIC_DM_NMI;
+               break;
+       }
+       return icr;
 }
 
 static inline int __prepare_ICR2 (unsigned int mask)
index bd0ca5c9f05330daacdf89326db21fc4a708d17b..bce5470ecb42e6e91ac47510366676fe13637a57 100644 (file)
@@ -52,6 +52,7 @@
 #include <asm/tlbflush.h>
 #include <asm/desc.h>
 #include <asm/arch_hooks.h>
+#include <asm/nmi.h>
 
 #include <mach_apic.h>
 #include <mach_wakecpu.h>
index dcc14477af1f0a5d16e083252389ba1d74173eac..78464097470a282a8064e8b873b1edcdfa5a1da0 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/utsname.h>
 #include <linux/kprobes.h>
 #include <linux/kexec.h>
+#include <linux/unwind.h>
 
 #ifdef CONFIG_EISA
 #include <linux/ioport.h>
@@ -47,7 +48,7 @@
 #include <asm/desc.h>
 #include <asm/i387.h>
 #include <asm/nmi.h>
-
+#include <asm/unwind.h>
 #include <asm/smp.h>
 #include <asm/arch_hooks.h>
 #include <asm/kdebug.h>
@@ -92,6 +93,7 @@ asmlinkage void spurious_interrupt_bug(void);
 asmlinkage void machine_check(void);
 
 static int kstack_depth_to_print = 24;
+static int call_trace = 1;
 ATOMIC_NOTIFIER_HEAD(i386die_chain);
 
 int register_die_notifier(struct notifier_block *nb)
@@ -170,7 +172,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
        return ebp;
 }
 
-static void show_trace_log_lvl(struct task_struct *task,
+static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
+{
+       int n = 0;
+       int printed = 0; /* nr of entries already printed on current line */
+
+       while (unwind(info) == 0 && UNW_PC(info)) {
+               ++n;
+               printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed);
+               if (arch_unw_user_mode(info))
+                       break;
+       }
+       if (printed)
+               printk("\n");
+       return n;
+}
+
+static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
                               unsigned long *stack, char *log_lvl)
 {
        unsigned long ebp;
@@ -178,6 +196,26 @@ static void show_trace_log_lvl(struct task_struct *task,
        if (!task)
                task = current;
 
+       if (call_trace >= 0) {
+               int unw_ret = 0;
+               struct unwind_frame_info info;
+
+               if (regs) {
+                       if (unwind_init_frame_info(&info, task, regs) == 0)
+                               unw_ret = show_trace_unwind(&info, log_lvl);
+               } else if (task == current)
+                       unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
+               else {
+                       if (unwind_init_blocked(&info, task) == 0)
+                               unw_ret = show_trace_unwind(&info, log_lvl);
+               }
+               if (unw_ret > 0) {
+                       if (call_trace > 0)
+                               return;
+                       printk("%sLegacy call trace:\n", log_lvl);
+               }
+       }
+
        if (task == current) {
                /* Grab ebp right from our regs */
                asm ("movl %%ebp, %0" : "=r" (ebp) : );
@@ -198,13 +236,13 @@ static void show_trace_log_lvl(struct task_struct *task,
        }
 }
 
-void show_trace(struct task_struct *task, unsigned long * stack)
+void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
 {
-       show_trace_log_lvl(task, stack, "");
+       show_trace_log_lvl(task, regs, stack, "");
 }
 
-static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
-                              char *log_lvl)
+static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+                              unsigned long *esp, char *log_lvl)
 {
        unsigned long *stack;
        int i;
@@ -225,13 +263,13 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
                printk("%08lx ", *stack++);
        }
        printk("\n%sCall Trace:\n", log_lvl);
-       show_trace_log_lvl(task, esp, log_lvl);
+       show_trace_log_lvl(task, regs, esp, log_lvl);
 }
 
 void show_stack(struct task_struct *task, unsigned long *esp)
 {
        printk("       ");
-       show_stack_log_lvl(task, esp, "");
+       show_stack_log_lvl(task, NULL, esp, "");
 }
 
 /*
@@ -241,7 +279,7 @@ void dump_stack(void)
 {
        unsigned long stack;
 
-       show_trace(current, &stack);
+       show_trace(current, NULL, &stack);
 }
 
 EXPORT_SYMBOL(dump_stack);
@@ -285,7 +323,7 @@ void show_registers(struct pt_regs *regs)
                u8 __user *eip;
 
                printk("\n" KERN_EMERG "Stack: ");
-               show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG);
+               show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
 
                printk(KERN_EMERG "Code: ");
 
@@ -1215,3 +1253,15 @@ static int __init kstack_setup(char *s)
        return 1;
 }
 __setup("kstack=", kstack_setup);
+
+static int __init call_trace_setup(char *s)
+{
+       if (strcmp(s, "old") == 0)
+               call_trace = -1;
+       else if (strcmp(s, "both") == 0)
+               call_trace = 0;
+       else if (strcmp(s, "new") == 0)
+               call_trace = 1;
+       return 1;
+}
+__setup("call_trace=", call_trace_setup);
index 7512f39c9f250b8e3a7017e619a03f05f4b4aa3d..2d4f1386e2b159065b8949d7f5e7ecd624e50fc9 100644 (file)
@@ -71,6 +71,15 @@ SECTIONS
   .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
   _edata = .;                  /* End of data section */
 
+#ifdef CONFIG_STACK_UNWIND
+  . = ALIGN(4);
+  .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
+       __start_unwind = .;
+       *(.eh_frame)
+       __end_unwind = .;
+  }
+#endif
+
   . = ALIGN(THREAD_SIZE);      /* init_task */
   .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
        *(.data.init_task)
index 3ad9a72a5036b324d32a976b034dedd03fbc09f0..693bdea4a52b44f7a5800c599b9b8428f210664a 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/oprofile.h>
 #include <asm/ptrace.h>
 #include <asm/msr.h>
+#include <asm/nmi.h>
  
 #include "op_x86_model.h"
 #include "op_counter.h"
index ac8a066035c24a1f9a9cc3f3d63e57ce29eee33d..7c61d357b82bc10302e1143068c1fca8a9a058dc 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/ptrace.h>
 #include <asm/fixmap.h>
 #include <asm/apic.h>
+#include <asm/nmi.h>
 
 #include "op_x86_model.h"
 #include "op_counter.h"
index d719015fc0444a8e229448654f16d7351bb41798..5c3ab4b027ade11f9a32973c5e32df85af9abeeb 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/ptrace.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
+#include <asm/nmi.h>
  
 #include "op_x86_model.h"
 #include "op_counter.h"
index 355d57970ba31cf3368aa61e5c32c92b81a85029..b045c279136c281bde5a67a1e7eef13dd58e846d 100644 (file)
@@ -272,9 +272,9 @@ cpu_idle (void)
        /* endless idle loop with no priority at all */
        while (1) {
                if (can_do_pal_halt)
-                       clear_thread_flag(TIF_POLLING_NRFLAG);
+                       current_thread_info()->status &= ~TS_POLLING;
                else
-                       set_thread_flag(TIF_POLLING_NRFLAG);
+                       current_thread_info()->status |= TS_POLLING;
 
                if (!need_resched()) {
                        void (*idle)(void);
index af44130f0d65f54f642d6556e1a826bffe3db659..ccc4a7fb97a370f8d56baf16e4eb1439a17621d3 100644 (file)
@@ -386,24 +386,45 @@ config HPET_EMULATE_RTC
        bool "Provide RTC interrupt"
        depends on HPET_TIMER && RTC=y
 
-config GART_IOMMU
-       bool "K8 GART IOMMU support"
+# Mark as embedded because too many people got it wrong.
+# The code disables itself when not needed.
+config IOMMU
+       bool "IOMMU support" if EMBEDDED
        default y
        select SWIOTLB
        select AGP
        depends on PCI
        help
-         Support for hardware IOMMU in AMD's Opteron/Athlon64 Processors
-         and for the bounce buffering software IOMMU.
-         Needed to run systems with more than 3GB of memory properly with
-         32-bit PCI devices that do not support DAC (Double Address Cycle).
-         The IOMMU can be turned off at runtime with the iommu=off parameter.
-         Normally the kernel will take the right choice by itself.
-         This option includes a driver for the AMD Opteron/Athlon64 IOMMU
-         northbridge and a software emulation used on other systems without
-         hardware IOMMU.  If unsure, say Y.
-
-# need this always selected by GART_IOMMU for the VIA workaround
+         Support for full DMA access of devices with 32bit memory access only
+         on systems with more than 3GB. This is usually needed for USB,
+         sound, many IDE/SATA chipsets and some other devices.
+         Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART
+         based IOMMU and a software bounce buffer based IOMMU used on Intel
+         systems and as fallback.
+         The code is only active when needed (enough memory and limited
+         device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified
+         too.
+
+config CALGARY_IOMMU
+       bool "IBM Calgary IOMMU support"
+       default y
+       select SWIOTLB
+       depends on PCI && EXPERIMENTAL
+       help
+         Support for hardware IOMMUs in IBM's xSeries x366 and x460
+         systems. Needed to run systems with more than 3GB of memory
+         properly with 32-bit PCI devices that do not support DAC
+         (Double Address Cycle). Calgary also supports bus level
+         isolation, where all DMAs pass through the IOMMU.  This
+         prevents them from going anywhere except their intended
+         destination. This catches hard-to-find kernel bugs and
+         mis-behaving drivers and devices that do not use the DMA-API
+         properly to set up their DMA buffers.  The IOMMU can be
+         turned off at boot time with the iommu=off parameter.
+         Normally the kernel will make the right choice by itself.
+         If unsure, say Y.
+
+# need this always selected by IOMMU for the VIA workaround
 config SWIOTLB
        bool
 
@@ -501,6 +522,10 @@ config REORDER
          optimal TLB usage. If you have pretty much any version of binutils, 
         this can increase your kernel build time by roughly one minute.
 
+config K8_NB
+       def_bool y
+       depends on AGP_AMD64 || IOMMU || (PCI && NUMA)
+
 endmenu
 
 #
index ea31b4c62105cfc808ce0278bee31228dd0bab23..1d92ab56c0f93fa06c4bf2b8f72bdbabb99ef782 100644 (file)
@@ -13,7 +13,7 @@ config DEBUG_RODATA
         If in doubt, say "N".
 
 config IOMMU_DEBUG
-       depends on GART_IOMMU && DEBUG_KERNEL
+       depends on IOMMU && DEBUG_KERNEL
        bool "Enable IOMMU debugging"
        help
          Force the IOMMU to on even when you have less than 4GB of
@@ -35,6 +35,22 @@ config IOMMU_LEAK
          Add a simple leak tracer to the IOMMU code. This is useful when you
         are debugging a buggy device driver that leaks IOMMU mappings.
 
+config DEBUG_STACKOVERFLOW
+        bool "Check for stack overflows"
+        depends on DEBUG_KERNEL
+        help
+         This option will cause messages to be printed if free stack space
+         drops below a certain limit.
+
+config DEBUG_STACK_USAGE
+        bool "Stack utilization instrumentation"
+        depends on DEBUG_KERNEL
+        help
+         Enables the display of the minimum amount of free stack which each
+         task has ever had available in the sysrq-T and sysrq-P debug output.
+
+         This option will slow down process creation somewhat.
+
 #config X86_REMOTE_DEBUG
 #       bool "kgdb debugging stub"
 
index e573e2ab55108fe2d2acf73fe16c950cc1b6b416..431bb4bc36cdc950b1a9168a3abfcb0b954eb373 100644 (file)
@@ -27,6 +27,7 @@ LDFLAGS_vmlinux :=
 CHECKFLAGS      += -D__x86_64__ -m64
 
 cflags-y       :=
+cflags-kernel-y        :=
 cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
 cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
 cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
@@ -35,7 +36,7 @@ cflags-y += -m64
 cflags-y += -mno-red-zone
 cflags-y += -mcmodel=kernel
 cflags-y += -pipe
-cflags-$(CONFIG_REORDER) += -ffunction-sections
+cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections
 # this makes reading assembly source easier, but produces worse code
 # actually it makes the kernel smaller too.
 cflags-y += -fno-reorder-blocks
@@ -55,6 +56,7 @@ cflags-y += $(call cc-option,-funit-at-a-time)
 cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
 
 CFLAGS += $(cflags-y)
+CFLAGS_KERNEL += $(cflags-kernel-y)
 AFLAGS += -m64
 
 head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
index 43ee6c50c2778ed4c55cf9ecb4afa0046802b3f8..deb063e7762debea68e06ba1ebdc4278e3523138 100644 (file)
@@ -107,8 +107,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
 isoimage: $(BOOTIMAGE)
        -rm -rf $(obj)/isoimage
        mkdir $(obj)/isoimage
-       cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \
-               $(obj)/isoimage
+       for i in lib lib64 share end ; do \
+               if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
+                       cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
+                       break ; \
+               fi ; \
+               if [ $$i = end ] ; then exit 1 ; fi ; \
+       done
        cp $(BOOTIMAGE) $(obj)/isoimage/linux
        echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
        if [ -f '$(FDINITRD)' ] ; then \
index cf4b88c416dc749bd30ff87f61e053703e6210da..3755b2e394d048916c2dcf86bd1214fd5e3dd44f 100644 (file)
@@ -77,11 +77,11 @@ static void gzip_release(void **);
  */
 static unsigned char *real_mode; /* Pointer to real-mode data */
 
-#define EXT_MEM_K   (*(unsigned short *)(real_mode + 0x2))
+#define RM_EXT_MEM_K   (*(unsigned short *)(real_mode + 0x2))
 #ifndef STANDARD_MEMORY_BIOS_CALL
-#define ALT_MEM_K   (*(unsigned long *)(real_mode + 0x1e0))
+#define RM_ALT_MEM_K   (*(unsigned long *)(real_mode + 0x1e0))
 #endif
-#define SCREEN_INFO (*(struct screen_info *)(real_mode+0))
+#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
 
 extern unsigned char input_data[];
 extern int input_len;
@@ -92,9 +92,9 @@ static unsigned long output_ptr = 0;
 
 static void *malloc(int size);
 static void free(void *where);
-void* memset(void* s, int c, unsigned n);
-void* memcpy(void* dest, const void* src, unsigned n);
+
+static void *memset(void *s, int c, unsigned n);
+static void *memcpy(void *dest, const void *src, unsigned n);
 
 static void putstr(const char *);
 
@@ -162,8 +162,8 @@ static void putstr(const char *s)
        int x,y,pos;
        char c;
 
-       x = SCREEN_INFO.orig_x;
-       y = SCREEN_INFO.orig_y;
+       x = RM_SCREEN_INFO.orig_x;
+       y = RM_SCREEN_INFO.orig_y;
 
        while ( ( c = *s++ ) != '\0' ) {
                if ( c == '\n' ) {
@@ -184,8 +184,8 @@ static void putstr(const char *s)
                }
        }
 
-       SCREEN_INFO.orig_x = x;
-       SCREEN_INFO.orig_y = y;
+       RM_SCREEN_INFO.orig_x = x;
+       RM_SCREEN_INFO.orig_y = y;
 
        pos = (x + cols * y) * 2;       /* Update cursor position */
        outb_p(14, vidport);
@@ -194,7 +194,7 @@ static void putstr(const char *s)
        outb_p(0xff & (pos >> 1), vidport+1);
 }
 
-void* memset(void* s, int c, unsigned n)
+static void* memset(void* s, int c, unsigned n)
 {
        int i;
        char *ss = (char*)s;
@@ -203,7 +203,7 @@ void* memset(void* s, int c, unsigned n)
        return s;
 }
 
-void* memcpy(void* dest, const void* src, unsigned n)
+static void* memcpy(void* dest, const void* src, unsigned n)
 {
        int i;
        char *d = (char *)dest, *s = (char *)src;
@@ -278,15 +278,15 @@ static void error(char *x)
        putstr(x);
        putstr("\n\n -- System halted");
 
-       while(1);
+       while(1);       /* Halt */
 }
 
-void setup_normal_output_buffer(void)
+static void setup_normal_output_buffer(void)
 {
 #ifdef STANDARD_MEMORY_BIOS_CALL
-       if (EXT_MEM_K < 1024) error("Less than 2MB of memory");
+       if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
 #else
-       if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory");
+       if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
 #endif
        output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
        free_mem_end_ptr = (long)real_mode;
@@ -297,13 +297,13 @@ struct moveparams {
        uch *high_buffer_start; int hcount;
 };
 
-void setup_output_buffer_if_we_run_high(struct moveparams *mv)
+static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
 {
        high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
 #ifdef STANDARD_MEMORY_BIOS_CALL
-       if (EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
+       if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
 #else
-       if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
+       if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
 #endif 
        mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
        low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
@@ -319,7 +319,7 @@ void setup_output_buffer_if_we_run_high(struct moveparams *mv)
        mv->high_buffer_start = high_buffer_start;
 }
 
-void close_output_buffer_if_we_run_high(struct moveparams *mv)
+static void close_output_buffer_if_we_run_high(struct moveparams *mv)
 {
        if (bytes_out > low_buffer_size) {
                mv->lcount = low_buffer_size;
@@ -335,7 +335,7 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
 {
        real_mode = rmode;
 
-       if (SCREEN_INFO.orig_video_mode == 7) {
+       if (RM_SCREEN_INFO.orig_video_mode == 7) {
                vidmem = (char *) 0xb0000;
                vidport = 0x3b4;
        } else {
@@ -343,8 +343,8 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
                vidport = 0x3d4;
        }
 
-       lines = SCREEN_INFO.orig_video_lines;
-       cols = SCREEN_INFO.orig_video_cols;
+       lines = RM_SCREEN_INFO.orig_video_lines;
+       cols = RM_SCREEN_INFO.orig_video_cols;
 
        if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
        else setup_output_buffer_if_we_run_high(mv);
index c44f5e2ec1006fc93d09c7b958ccd5d4cb0ad709..eae86691709a28f86db3711061897d12822402fc 100644 (file)
@@ -149,10 +149,8 @@ int main(int argc, char ** argv)
        sz = sb.st_size;
        fprintf (stderr, "System is %d kB\n", sz/1024);
        sys_size = (sz + 15) / 16;
-       /* 0x40000*16 = 4.0 MB, reasonable estimate for the current maximum */
-       if (sys_size > (is_big_kernel ? 0x40000 : DEF_SYSSIZE))
-               die("System is too big. Try using %smodules.",
-                       is_big_kernel ? "" : "bzImage or ");
+       if (!is_big_kernel && sys_size > DEF_SYSSIZE)
+               die("System is too big. Try using bzImage or modules.");
        while (sz > 0) {
                int l, n;
 
index 69db0c0721d1c4835ff02fed98ff662c0db1665c..e69d403949c872767c6e0ae45db1d2951259b444 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.17-rc1-git11
-# Sun Apr 16 07:22:36 2006
+# Linux kernel version: 2.6.17-git6
+# Sat Jun 24 00:52:28 2006
 #
 CONFIG_X86_64=y
 CONFIG_64BIT=y
@@ -42,7 +42,6 @@ CONFIG_IKCONFIG_PROC=y
 # CONFIG_RELAY is not set
 CONFIG_INITRAMFS_SOURCE=""
 CONFIG_UID16=y
-CONFIG_VM86=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
@@ -57,7 +56,6 @@ CONFIG_FUTEX=y
 CONFIG_EPOLL=y
 CONFIG_SHMEM=y
 CONFIG_SLAB=y
-CONFIG_DOUBLEFAULT=y
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 # CONFIG_SLOB is not set
@@ -144,7 +142,8 @@ CONFIG_NR_CPUS=32
 CONFIG_HOTPLUG_CPU=y
 CONFIG_HPET_TIMER=y
 CONFIG_HPET_EMULATE_RTC=y
-CONFIG_GART_IOMMU=y
+CONFIG_IOMMU=y
+# CONFIG_CALGARY_IOMMU is not set
 CONFIG_SWIOTLB=y
 CONFIG_X86_MCE=y
 CONFIG_X86_MCE_INTEL=y
@@ -158,6 +157,7 @@ CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
 CONFIG_HZ=250
 # CONFIG_REORDER is not set
+CONFIG_K8_NB=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_ISA_DMA_API=y
@@ -293,6 +293,8 @@ CONFIG_IP_PNP_DHCP=y
 # CONFIG_INET_IPCOMP is not set
 # CONFIG_INET_XFRM_TUNNEL is not set
 # CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
 CONFIG_INET_DIAG=y
 CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
@@ -305,7 +307,10 @@ CONFIG_IPV6=y
 # CONFIG_INET6_IPCOMP is not set
 # CONFIG_INET6_XFRM_TUNNEL is not set
 # CONFIG_INET6_TUNNEL is not set
+# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
 # CONFIG_IPV6_TUNNEL is not set
+# CONFIG_NETWORK_SECMARK is not set
 # CONFIG_NETFILTER is not set
 
 #
@@ -344,6 +349,7 @@ CONFIG_IPV6=y
 # Network testing
 #
 # CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_TCPPROBE is not set
 # CONFIG_HAMRADIO is not set
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
@@ -360,6 +366,7 @@ CONFIG_STANDALONE=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 CONFIG_FW_LOADER=y
 # CONFIG_DEBUG_DRIVER is not set
+# CONFIG_SYS_HYPERVISOR is not set
 
 #
 # Connector - unified userspace <-> kernelspace linker
@@ -526,6 +533,7 @@ CONFIG_SCSI_ATA_PIIX=y
 # CONFIG_SCSI_SATA_MV is not set
 CONFIG_SCSI_SATA_NV=y
 # CONFIG_SCSI_PDC_ADMA is not set
+# CONFIG_SCSI_HPTIOP is not set
 # CONFIG_SCSI_SATA_QSTOR is not set
 # CONFIG_SCSI_SATA_PROMISE is not set
 # CONFIG_SCSI_SATA_SX4 is not set
@@ -591,10 +599,7 @@ CONFIG_IEEE1394=y
 #
 # Device Drivers
 #
-
-#
-# Texas Instruments PCILynx requires I2C
-#
+# CONFIG_IEEE1394_PCILYNX is not set
 CONFIG_IEEE1394_OHCI1394=y
 
 #
@@ -645,7 +650,16 @@ CONFIG_VORTEX=y
 #
 # Tulip family network device support
 #
-# CONFIG_NET_TULIP is not set
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=y
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_ULI526X is not set
 # CONFIG_HP100 is not set
 CONFIG_NET_PCI=y
 # CONFIG_PCNET32 is not set
@@ -697,6 +711,7 @@ CONFIG_TIGON3=y
 # CONFIG_IXGB is not set
 CONFIG_S2IO=m
 # CONFIG_S2IO_NAPI is not set
+# CONFIG_MYRI10GE is not set
 
 #
 # Token Ring devices
@@ -887,7 +902,56 @@ CONFIG_HPET_MMAP=y
 #
 # I2C support
 #
-# CONFIG_I2C is not set
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+
+#
+# I2C Algorithms
+#
+# CONFIG_I2C_ALGOBIT is not set
+# CONFIG_I2C_ALGOPCF is not set
+# CONFIG_I2C_ALGOPCA is not set
+
+#
+# I2C Hardware Bus support
+#
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
+# CONFIG_I2C_AMD756 is not set
+# CONFIG_I2C_AMD8111 is not set
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_PIIX4 is not set
+CONFIG_I2C_ISA=m
+# CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_PROSAVAGE is not set
+# CONFIG_I2C_SAVAGE4 is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
+# CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_VIA is not set
+# CONFIG_I2C_VIAPRO is not set
+# CONFIG_I2C_VOODOO3 is not set
+# CONFIG_I2C_PCA_ISA is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_SENSORS_DS1337 is not set
+# CONFIG_SENSORS_DS1374 is not set
+# CONFIG_SENSORS_EEPROM is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
 
 #
 # SPI support
@@ -898,14 +962,51 @@ CONFIG_HPET_MMAP=y
 #
 # Dallas's 1-wire bus
 #
-# CONFIG_W1 is not set
 
 #
 # Hardware Monitoring support
 #
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_ABITUGURU is not set
+# CONFIG_SENSORS_ADM1021 is not set
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATXP1 is not set
+# CONFIG_SENSORS_DS1621 is not set
 # CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_FSCHER is not set
+# CONFIG_SENSORS_FSCPOS is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_GL520SM is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM75 is not set
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_LM92 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_SIS5595 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47M192 is not set
+CONFIG_SENSORS_SMSC47B397=m
+# CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_VT8231 is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83791D is not set
+# CONFIG_SENSORS_W83792D is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
 # CONFIG_SENSORS_HDAPS is not set
 # CONFIG_HWMON_DEBUG_CHIP is not set
 
@@ -918,6 +1019,7 @@ CONFIG_HWMON=y
 # Multimedia devices
 #
 # CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
 
 #
 # Digital Video Broadcasting Devices
@@ -953,28 +1055,17 @@ CONFIG_SOUND=y
 # Open Sound System
 #
 CONFIG_SOUND_PRIME=y
-CONFIG_OBSOLETE_OSS_DRIVER=y
 # CONFIG_SOUND_BT878 is not set
-# CONFIG_SOUND_CMPCI is not set
 # CONFIG_SOUND_EMU10K1 is not set
 # CONFIG_SOUND_FUSION is not set
-# CONFIG_SOUND_CS4281 is not set
-# CONFIG_SOUND_ES1370 is not set
 # CONFIG_SOUND_ES1371 is not set
-# CONFIG_SOUND_ESSSOLO1 is not set
-# CONFIG_SOUND_MAESTRO is not set
-# CONFIG_SOUND_MAESTRO3 is not set
 CONFIG_SOUND_ICH=y
-# CONFIG_SOUND_SONICVIBES is not set
 # CONFIG_SOUND_TRIDENT is not set
 # CONFIG_SOUND_MSNDCLAS is not set
 # CONFIG_SOUND_MSNDPIN is not set
 # CONFIG_SOUND_VIA82CXXX is not set
 # CONFIG_SOUND_OSS is not set
-# CONFIG_SOUND_ALI5455 is not set
-# CONFIG_SOUND_FORTE is not set
-# CONFIG_SOUND_RME96XX is not set
-# CONFIG_SOUND_AD1980 is not set
+# CONFIG_SOUND_TVMIXER is not set
 
 #
 # USB support
@@ -1000,6 +1091,7 @@ CONFIG_USB_DEVICEFS=y
 CONFIG_USB_EHCI_HCD=y
 # CONFIG_USB_EHCI_SPLIT_ISO is not set
 # CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
 # CONFIG_USB_ISP116X_HCD is not set
 CONFIG_USB_OHCI_HCD=y
 # CONFIG_USB_OHCI_BIG_ENDIAN is not set
@@ -1089,10 +1181,12 @@ CONFIG_USB_MON=y
 # CONFIG_USB_LEGOTOWER is not set
 # CONFIG_USB_LCD is not set
 # CONFIG_USB_LED is not set
+# CONFIG_USB_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
 # CONFIG_USB_PHIDGETKIT is not set
 # CONFIG_USB_PHIDGETSERVO is not set
 # CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_APPLEDISPLAY is not set
 # CONFIG_USB_SISUSBVGA is not set
 # CONFIG_USB_LD is not set
 # CONFIG_USB_TEST is not set
@@ -1140,6 +1234,19 @@ CONFIG_USB_MON=y
 #
 # CONFIG_RTC_CLASS is not set
 
+#
+# DMA Engine support
+#
+# CONFIG_DMA_ENGINE is not set
+
+#
+# DMA Clients
+#
+
+#
+# DMA Devices
+#
+
 #
 # Firmware Drivers
 #
@@ -1175,6 +1282,7 @@ CONFIG_FS_POSIX_ACL=y
 # CONFIG_MINIX_FS is not set
 # CONFIG_ROMFS_FS is not set
 CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
 # CONFIG_QUOTA is not set
 CONFIG_DNOTIFY=y
 CONFIG_AUTOFS_FS=y
@@ -1331,7 +1439,8 @@ CONFIG_DETECT_SOFTLOCKUP=y
 CONFIG_DEBUG_FS=y
 # CONFIG_DEBUG_VM is not set
 # CONFIG_FRAME_POINTER is not set
-# CONFIG_UNWIND_INFO is not set
+CONFIG_UNWIND_INFO=y
+CONFIG_STACK_UNWIND=y
 # CONFIG_FORCED_INLINING is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_DEBUG_RODATA is not set
index 1c23095f18130943eefaccf4ee099398e975ce1e..2c8209a3605ad4864b1c3d8af3653493236efa9a 100644 (file)
@@ -2,7 +2,6 @@
  * Copyright 2002 Andi Kleen, SuSE Labs.
  * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes.
  * This is used for ptrace, signals and coredumps in 32bit emulation.
- * $Id: fpu32.c,v 1.1 2002/03/21 14:16:32 ak Exp $
  */ 
 
 #include <linux/sched.h>
index e0a92439f6342a350d45feaca527ed5b9e9e3426..25e5ca22204c5007c3b523413f59785d49c0a8e3 100644 (file)
@@ -6,8 +6,6 @@
  *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  *  2000-12-*   x86-64 compatibility mode signal handling by Andi Kleen
- * 
- *  $Id: ia32_signal.c,v 1.22 2002/07/29 10:34:03 ak Exp $
  */
 
 #include <linux/sched.h>
index 4ec594ab1a9819777f7c58337cc3741b05274c42..c536fa98ea37769c450bbc8b84e4807d25a9834f 100644 (file)
@@ -155,6 +155,7 @@ sysenter_tracesys:
        .previous
        jmp     sysenter_do_call
        CFI_ENDPROC
+ENDPROC(ia32_sysenter_target)
 
 /*
  * 32bit SYSCALL instruction entry.
@@ -178,7 +179,7 @@ sysenter_tracesys:
  */    
 ENTRY(ia32_cstar_target)
        CFI_STARTPROC32 simple
-       CFI_DEF_CFA     rsp,0
+       CFI_DEF_CFA     rsp,PDA_STACKOFFSET
        CFI_REGISTER    rip,rcx
        /*CFI_REGISTER  rflags,r11*/
        swapgs
@@ -249,6 +250,7 @@ cstar_tracesys:
        .quad 1b,ia32_badarg
        .previous
        jmp cstar_do_call
+END(ia32_cstar_target)
                                
 ia32_badarg:
        movq $-EFAULT,%rax
@@ -314,16 +316,13 @@ ia32_tracesys:
        LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
        RESTORE_REST
        jmp ia32_do_syscall
+END(ia32_syscall)
 
 ia32_badsys:
        movq $0,ORIG_RAX-ARGOFFSET(%rsp)
        movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
        jmp int_ret_from_sys_call
 
-ni_syscall:
-       movq %rax,%rdi
-       jmp  sys32_ni_syscall                   
-
 quiet_ni_syscall:
        movq $-ENOSYS,%rax
        ret
@@ -370,10 +369,10 @@ ENTRY(ia32_ptregs_common)
        RESTORE_REST
        jmp  ia32_sysret        /* misbalances the return cache */
        CFI_ENDPROC
+END(ia32_ptregs_common)
 
        .section .rodata,"a"
        .align 8
-       .globl ia32_sys_call_table
 ia32_sys_call_table:
        .quad sys_restart_syscall
        .quad sys_exit
index 23a4515a73b4024c9fb4254ec769e465e0c6fec8..a590b7a0d92d46f009b1eab519fa5effe7185279 100644 (file)
@@ -7,8 +7,6 @@
  * 
  * This allows to access 64bit processes too; but there is no way to see the extended 
  * register contents.
- *
- * $Id: ptrace32.c,v 1.16 2003/03/14 16:06:35 ak Exp $
  */ 
 
 #include <linux/kernel.h>
@@ -27,6 +25,7 @@
 #include <asm/debugreg.h>
 #include <asm/i387.h>
 #include <asm/fpu32.h>
+#include <asm/ia32.h>
 
 /*
  * Determines which flags the user has access to [1 = access, 0 = no access].
@@ -199,6 +198,24 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 
 #undef R32
 
+static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
+{
+       int ret;
+       compat_siginfo_t *si32 = (compat_siginfo_t *)compat_ptr(data);
+       siginfo_t *si = compat_alloc_user_space(sizeof(siginfo_t));
+       if (request == PTRACE_SETSIGINFO) {
+               ret = copy_siginfo_from_user32(si, si32);
+               if (ret)
+                       return ret;
+       }
+       ret = sys_ptrace(request, pid, addr, (unsigned long)si);
+       if (ret)
+               return ret;
+       if (request == PTRACE_GETSIGINFO)
+               ret = copy_siginfo_to_user32(si32, si);
+       return ret;
+}
+
 asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 {
        struct task_struct *child;
@@ -208,9 +225,19 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
        __u32 val;
 
        switch (request) { 
-       default:
+       case PTRACE_TRACEME:
+       case PTRACE_ATTACH:
+       case PTRACE_KILL:
+       case PTRACE_CONT:
+       case PTRACE_SINGLESTEP:
+       case PTRACE_DETACH:
+       case PTRACE_SYSCALL:
+       case PTRACE_SETOPTIONS:
                return sys_ptrace(request, pid, addr, data); 
 
+       default:
+               return -EINVAL;
+
        case PTRACE_PEEKTEXT:
        case PTRACE_PEEKDATA:
        case PTRACE_POKEDATA:
@@ -225,10 +252,11 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
        case PTRACE_GETFPXREGS:
        case PTRACE_GETEVENTMSG:
                break;
-       } 
 
-       if (request == PTRACE_TRACEME)
-               return ptrace_traceme();
+       case PTRACE_SETSIGINFO:
+       case PTRACE_GETSIGINFO:
+               return ptrace32_siginfo(request, pid, addr, data);
+       }
 
        child = ptrace_get_task_struct(pid);
        if (IS_ERR(child))
@@ -349,8 +377,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
                break;
 
        default:
-               ret = -EINVAL;
-               break;
+               BUG();
        }
 
  out:
index f182b20858e25e02adca0a93db6ce55c2f6afec1..dc88154c412b34ee654b801b0828edcbcb35ef4b 100644 (file)
@@ -508,19 +508,6 @@ sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options)
        return compat_sys_wait4(pid, stat_addr, options, NULL);
 }
 
-int sys32_ni_syscall(int call)
-{ 
-       struct task_struct *me = current;
-       static char lastcomm[sizeof(me->comm)];
-
-       if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-               printk(KERN_INFO "IA32 syscall %d from %s not implemented\n",
-                      call, me->comm);
-               strncpy(lastcomm, me->comm, sizeof(lastcomm));
-       } 
-       return -ENOSYS;        
-} 
-
 /* 32-bit timeval and related flotsam.  */
 
 asmlinkage long
@@ -916,7 +903,7 @@ long sys32_vm86_warning(void)
        struct task_struct *me = current;
        static char lastcomm[sizeof(me->comm)];
        if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-               printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
+               compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
                       me->comm);
                strncpy(lastcomm, me->comm, sizeof(lastcomm));
        } 
@@ -929,13 +916,3 @@ long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
        return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
 }
 
-static int __init ia32_init (void)
-{
-       printk("IA32 emulation $Id: sys_ia32.c,v 1.32 2002/03/24 13:02:28 ak Exp $\n");  
-       return 0;
-}
-
-__initcall(ia32_init);
-
-extern unsigned long ia32_sys_call_table[];
-EXPORT_SYMBOL(ia32_sys_call_table);
index 059c88313f4e03bde89e25835950bf4558acd611..aeb9c560be88aef2fd7141c792221ed85f92266e 100644 (file)
@@ -8,7 +8,7 @@ obj-y   := process.o signal.o entry.o traps.o irq.o \
                ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
                x8664_ksyms.o i387.o syscall.o vsyscall.o \
                setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
-               pci-dma.o pci-nommu.o
+               pci-dma.o pci-nommu.o alternative.o
 
 obj-$(CONFIG_X86_MCE)         += mce.o
 obj-$(CONFIG_X86_MCE_INTEL)    += mce_intel.o
@@ -28,11 +28,13 @@ obj-$(CONFIG_PM)            += suspend.o
 obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
 obj-$(CONFIG_CPU_FREQ)         += cpufreq/
 obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
-obj-$(CONFIG_GART_IOMMU)       += pci-gart.o aperture.o
+obj-$(CONFIG_IOMMU)            += pci-gart.o aperture.o
+obj-$(CONFIG_CALGARY_IOMMU)    += pci-calgary.o tce.o
 obj-$(CONFIG_SWIOTLB)          += pci-swiotlb.o
 obj-$(CONFIG_KPROBES)          += kprobes.o
 obj-$(CONFIG_X86_PM_TIMER)     += pmtimer.o
 obj-$(CONFIG_X86_VSMP)         += vsmp.o
+obj-$(CONFIG_K8_NB)            += k8.o
 
 obj-$(CONFIG_MODULES)          += module.o
 
@@ -49,3 +51,5 @@ intel_cacheinfo-y             += ../../i386/kernel/cpu/intel_cacheinfo.o
 quirks-y                       += ../../i386/kernel/quirks.o
 i8237-y                                += ../../i386/kernel/i8237.o
 msr-$(subst m,y,$(CONFIG_X86_MSR))  += ../../i386/kernel/msr.o
+alternative-y                  += ../../i386/kernel/alternative.o
+
index 70b9d21ed675b1aabe1874d785fa5f0b98e3b87d..a195ef06ec5553e7c9f3fa7eb12fbb4078bd826c 100644 (file)
@@ -8,7 +8,6 @@
  * because only the bootmem allocator can allocate 32+MB. 
  * 
  * Copyright 2002 Andi Kleen, SuSE Labs.
- * $Id: aperture.c,v 1.7 2003/08/01 03:36:18 ak Exp $
  */
 #include <linux/config.h>
 #include <linux/kernel.h>
@@ -24,6 +23,7 @@
 #include <asm/proto.h>
 #include <asm/pci-direct.h>
 #include <asm/dma.h>
+#include <asm/k8.h>
 
 int iommu_aperture;
 int iommu_aperture_disabled __initdata = 0;
@@ -37,8 +37,6 @@ int fix_aperture __initdata = 1;
 /* This code runs before the PCI subsystem is initialized, so just
    access the northbridge directly. */
 
-#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16))
-
 static u32 __init allocate_aperture(void) 
 {
        pg_data_t *nd0 = NODE_DATA(0);
@@ -68,20 +66,20 @@ static u32 __init allocate_aperture(void)
        return (u32)__pa(p); 
 }
 
-static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) 
+static int __init aperture_valid(u64 aper_base, u32 aper_size)
 { 
        if (!aper_base) 
                return 0;
        if (aper_size < 64*1024*1024) { 
-               printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20); 
+               printk("Aperture too small (%d MB)\n", aper_size>>20);
                return 0;
        }
        if (aper_base + aper_size >= 0xffffffff) { 
-               printk("Aperture from %s beyond 4GB. Ignoring.\n",name);
+               printk("Aperture beyond 4GB. Ignoring.\n");
                return 0; 
        }
        if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
-               printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name);
+               printk("Aperture pointing to e820 RAM. Ignoring.\n");
                return 0; 
        } 
        return 1;
@@ -140,7 +138,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
        printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 
               aper, 32 << *order, apsizereg);
 
-       if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order))
+       if (!aperture_valid(aper, (32*1024*1024) << *order))
            return 0;
        return (u32)aper; 
 } 
@@ -208,10 +206,10 @@ void __init iommu_hole_init(void)
 
        fix = 0;
        for (num = 24; num < 32; num++) {               
-               char name[30];
-               if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 
-                       continue;       
+               if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
+                       continue;
 
+               iommu_detected = 1;
                iommu_aperture = 1; 
 
                aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; 
@@ -222,9 +220,7 @@ void __init iommu_hole_init(void)
                printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, 
                       aper_base, aper_size>>20);
                
-               sprintf(name, "northbridge cpu %d", num-24); 
-
-               if (!aperture_valid(name, aper_base, aper_size)) { 
+               if (!aperture_valid(aper_base, aper_size)) {
                        fix = 1; 
                        break; 
                }
@@ -273,7 +269,7 @@ void __init iommu_hole_init(void)
 
        /* Fix up the north bridges */
        for (num = 24; num < 32; num++) {               
-               if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 
+               if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
                        continue;       
 
                /* Don't enable translation yet. That is done later. 
index 29ef99001e0598d8ae184e8fd4b9806036ea1f79..b2ead91df218b0e68e9d1cb056981eb0e491896b 100644 (file)
@@ -100,7 +100,7 @@ void clear_local_APIC(void)
        maxlvt = get_maxlvt();
 
        /*
-        * Masking an LVT entry on a P6 can trigger a local APIC error
+        * Masking an LVT entry can trigger a local APIC error
         * if the vector is zero. Mask LVTERR first to prevent this.
         */
        if (maxlvt >= 3) {
@@ -851,7 +851,18 @@ void disable_APIC_timer(void)
                unsigned long v;
 
                v = apic_read(APIC_LVTT);
-               apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
+               /*
+                * When an illegal vector value (0-15) is written to an LVT
+                * entry and delivery mode is Fixed, the APIC may signal an
+                * illegal vector error, with out regard to whether the mask
+                * bit is set or whether an interrupt is actually seen on input.
+                *
+                * Boot sequence might call this function when the LVTT has
+                * '0' vector value. So make sure vector field is set to
+                * valid value.
+                */
+               v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+               apic_write(APIC_LVTT, v);
        }
 }
 
@@ -909,15 +920,13 @@ int setup_profiling_timer(unsigned int multiplier)
        return -EINVAL;
 }
 
-#ifdef CONFIG_X86_MCE_AMD
-void setup_threshold_lvt(unsigned long lvt_off)
+void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
+                           unsigned char msg_type, unsigned char mask)
 {
-       unsigned int v = 0;
-       unsigned long reg = (lvt_off << 4) + 0x500;
-       v |= THRESHOLD_APIC_VECTOR;
+       unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
+       unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
        apic_write(reg, v);
 }
-#endif /* CONFIG_X86_MCE_AMD */
 
 #undef APIC_DIVISOR
 
@@ -983,7 +992,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
 }
 
 /*
- * oem_force_hpet_timer -- force HPET mode for some boxes.
+ * apic_is_clustered_box() -- Check if we can expect good TSC
  *
  * Thus far, the major user of this is IBM's Summit2 series:
  *
@@ -991,7 +1000,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
  * multi-chassis. Use available data to take a good guess.
  * If in doubt, go HPET.
  */
-__cpuinit int oem_force_hpet_timer(void)
+__cpuinit int apic_is_clustered_box(void)
 {
        int i, clusters, zeros;
        unsigned id;
@@ -1022,8 +1031,7 @@ __cpuinit int oem_force_hpet_timer(void)
        }
 
        /*
-        * If clusters > 2, then should be multi-chassis.  Return 1 for HPET.
-        * Else return 0 to use TSC.
+        * If clusters > 2, then should be multi-chassis.
         * May have to revisit this when multi-core + hyperthreaded CPUs come
         * out, but AFAIK this will work even for them.
         */
index 4e6c3b729e39793b39c0bcaed3a10e0e1832b0ec..8ca04912b1cc6f8e44ec29b3a585d84d00653557 100644 (file)
@@ -111,14 +111,14 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
        atomic_dec(&waiting_for_crash_ipi);
        /* Assume hlt works */
        for(;;)
-               asm("hlt");
+               halt();
 
        return 1;
 }
 
 static void smp_send_nmi_allbutself(void)
 {
-       send_IPI_allbutself(APIC_DM_NMI);
+       send_IPI_allbutself(NMI_VECTOR);
 }
 
 /*
index 1ef6028f721ea843936b4994581c67dfa7e1b9c6..9e94d834624bbc9a0960ba9975ef6b62c389a501 100644 (file)
@@ -1,7 +1,6 @@
 /* 
  * Handle the memory map.
  * The functions here do the job until bootmem takes over.
- * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $
  *
  *  Getting sanitize_e820_map() in sync with i386 version by applying change:
  *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
@@ -621,6 +620,7 @@ void __init parse_memmapopt(char *p, char **from)
 }
 
 unsigned long pci_mem_start = 0xaeedbabe;
+EXPORT_SYMBOL(pci_mem_start);
 
 /*
  * Search for the biggest gap in the low 32 bits of the e820
index 586b34c00c489a2d3d495a904c00b158a1abfba4..7290e72b9a34d2a56be1a90e3c96989d9df1d844 100644 (file)
@@ -154,6 +154,7 @@ rff_trace:
        GET_THREAD_INFO(%rcx)   
        jmp rff_action
        CFI_ENDPROC
+END(ret_from_fork)
 
 /*
  * System call entry. Upto 6 arguments in registers are supported.
@@ -188,7 +189,7 @@ rff_trace:
 
 ENTRY(system_call)
        CFI_STARTPROC   simple
-       CFI_DEF_CFA     rsp,0
+       CFI_DEF_CFA     rsp,PDA_STACKOFFSET
        CFI_REGISTER    rip,rcx
        /*CFI_REGISTER  rflags,r11*/
        swapgs
@@ -285,6 +286,7 @@ tracesys:
        /* Use IRET because user could have changed frame */
        jmp int_ret_from_sys_call
        CFI_ENDPROC
+END(system_call)
                
 /* 
  * Syscall return path ending with IRET.
@@ -364,6 +366,7 @@ int_restore_rest:
        cli
        jmp int_with_check
        CFI_ENDPROC
+END(int_ret_from_sys_call)
                
 /* 
  * Certain special system calls that need to save a complete full stack frame.
@@ -375,6 +378,7 @@ int_restore_rest:
        leaq    \func(%rip),%rax
        leaq    -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
        jmp     ptregscall_common
+END(\label)
        .endm
 
        CFI_STARTPROC
@@ -404,6 +408,7 @@ ENTRY(ptregscall_common)
        CFI_REL_OFFSET rip, 0
        ret
        CFI_ENDPROC
+END(ptregscall_common)
        
 ENTRY(stub_execve)
        CFI_STARTPROC
@@ -418,6 +423,7 @@ ENTRY(stub_execve)
        RESTORE_REST
        jmp int_ret_from_sys_call
        CFI_ENDPROC
+END(stub_execve)
        
 /*
  * sigreturn is special because it needs to restore all registers on return.
@@ -435,6 +441,7 @@ ENTRY(stub_rt_sigreturn)
        RESTORE_REST
        jmp int_ret_from_sys_call
        CFI_ENDPROC
+END(stub_rt_sigreturn)
 
 /*
  * initial frame state for interrupts and exceptions
@@ -466,29 +473,18 @@ ENTRY(stub_rt_sigreturn)
 /* 0(%rsp): interrupt number */ 
        .macro interrupt func
        cld
-#ifdef CONFIG_DEBUG_INFO
-       SAVE_ALL        
-       movq %rsp,%rdi
-       /*
-        * Setup a stack frame pointer.  This allows gdb to trace
-        * back to the original stack.
-        */
-       movq %rsp,%rbp
-       CFI_DEF_CFA_REGISTER    rbp
-#else          
        SAVE_ARGS
        leaq -ARGOFFSET(%rsp),%rdi      # arg1 for handler
-#endif 
+       pushq %rbp
+       CFI_ADJUST_CFA_OFFSET   8
+       CFI_REL_OFFSET          rbp, 0
+       movq %rsp,%rbp
+       CFI_DEF_CFA_REGISTER    rbp
        testl $3,CS(%rdi)
        je 1f
        swapgs  
 1:     incl    %gs:pda_irqcount        # RED-PEN should check preempt count
-       movq %gs:pda_irqstackptr,%rax
-       cmoveq %rax,%rsp /*todo This needs CFI annotation! */
-       pushq %rdi                      # save old stack        
-#ifndef CONFIG_DEBUG_INFO
-       CFI_ADJUST_CFA_OFFSET   8
-#endif
+       cmoveq %gs:pda_irqstackptr,%rsp
        call \func
        .endm
 
@@ -497,17 +493,11 @@ ENTRY(common_interrupt)
        interrupt do_IRQ
        /* 0(%rsp): oldrsp-ARGOFFSET */
 ret_from_intr:
-       popq  %rdi
-#ifndef CONFIG_DEBUG_INFO
-       CFI_ADJUST_CFA_OFFSET   -8
-#endif
        cli     
        decl %gs:pda_irqcount
-#ifdef CONFIG_DEBUG_INFO
-       movq RBP(%rdi),%rbp
+       leaveq
        CFI_DEF_CFA_REGISTER    rsp
-#endif
-       leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
+       CFI_ADJUST_CFA_OFFSET   -8
 exit_intr:
        GET_THREAD_INFO(%rcx)
        testl $3,CS-ARGOFFSET(%rsp)
@@ -589,7 +579,9 @@ retint_kernel:
        call preempt_schedule_irq
        jmp exit_intr
 #endif 
+
        CFI_ENDPROC
+END(common_interrupt)
        
 /*
  * APIC interrupts.
@@ -605,17 +597,21 @@ retint_kernel:
 
 ENTRY(thermal_interrupt)
        apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
+END(thermal_interrupt)
 
 ENTRY(threshold_interrupt)
        apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
+END(threshold_interrupt)
 
 #ifdef CONFIG_SMP      
 ENTRY(reschedule_interrupt)
        apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
+END(reschedule_interrupt)
 
        .macro INVALIDATE_ENTRY num
 ENTRY(invalidate_interrupt\num)
        apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt 
+END(invalidate_interrupt\num)
        .endm
 
        INVALIDATE_ENTRY 0
@@ -629,17 +625,21 @@ ENTRY(invalidate_interrupt\num)
 
 ENTRY(call_function_interrupt)
        apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
+END(call_function_interrupt)
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC   
 ENTRY(apic_timer_interrupt)
        apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
+END(apic_timer_interrupt)
 
 ENTRY(error_interrupt)
        apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
+END(error_interrupt)
 
 ENTRY(spurious_interrupt)
        apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
+END(spurious_interrupt)
 #endif
                                
 /*
@@ -777,6 +777,7 @@ error_kernelspace:
        cmpq $gs_change,RIP(%rsp)
         je   error_swapgs
        jmp  error_sti
+END(error_entry)
        
        /* Reload gs selector with exception handling */
        /* edi:  new selector */ 
@@ -794,6 +795,7 @@ gs_change:
        CFI_ADJUST_CFA_OFFSET -8
         ret
        CFI_ENDPROC
+ENDPROC(load_gs_index)
        
         .section __ex_table,"a"
         .align 8
@@ -847,7 +849,7 @@ ENTRY(kernel_thread)
        UNFAKE_STACK_FRAME
        ret
        CFI_ENDPROC
-
+ENDPROC(kernel_thread)
        
 child_rip:
        /*
@@ -860,6 +862,7 @@ child_rip:
        # exit
        xorl %edi, %edi
        call do_exit
+ENDPROC(child_rip)
 
 /*
  * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -889,19 +892,24 @@ ENTRY(execve)
        UNFAKE_STACK_FRAME
        ret
        CFI_ENDPROC
+ENDPROC(execve)
 
 KPROBE_ENTRY(page_fault)
        errorentry do_page_fault
+END(page_fault)
        .previous .text
 
 ENTRY(coprocessor_error)
        zeroentry do_coprocessor_error
+END(coprocessor_error)
 
 ENTRY(simd_coprocessor_error)
        zeroentry do_simd_coprocessor_error     
+END(simd_coprocessor_error)
 
 ENTRY(device_not_available)
        zeroentry math_state_restore
+END(device_not_available)
 
        /* runs on exception stack */
 KPROBE_ENTRY(debug)
@@ -911,6 +919,7 @@ KPROBE_ENTRY(debug)
        paranoidentry do_debug, DEBUG_STACK
        jmp paranoid_exit
        CFI_ENDPROC
+END(debug)
        .previous .text
 
        /* runs on exception stack */   
@@ -961,6 +970,7 @@ paranoid_schedule:
        cli
        jmp paranoid_userspace
        CFI_ENDPROC
+END(nmi)
        .previous .text
 
 KPROBE_ENTRY(int3)
@@ -970,22 +980,28 @@ KPROBE_ENTRY(int3)
        paranoidentry do_int3, DEBUG_STACK
        jmp paranoid_exit
        CFI_ENDPROC
+END(int3)
        .previous .text
 
 ENTRY(overflow)
        zeroentry do_overflow
+END(overflow)
 
 ENTRY(bounds)
        zeroentry do_bounds
+END(bounds)
 
 ENTRY(invalid_op)
        zeroentry do_invalid_op 
+END(invalid_op)
 
 ENTRY(coprocessor_segment_overrun)
        zeroentry do_coprocessor_segment_overrun
+END(coprocessor_segment_overrun)
 
 ENTRY(reserved)
        zeroentry do_reserved
+END(reserved)
 
        /* runs on exception stack */
 ENTRY(double_fault)
@@ -993,12 +1009,15 @@ ENTRY(double_fault)
        paranoidentry do_double_fault
        jmp paranoid_exit
        CFI_ENDPROC
+END(double_fault)
 
 ENTRY(invalid_TSS)
        errorentry do_invalid_TSS
+END(invalid_TSS)
 
 ENTRY(segment_not_present)
        errorentry do_segment_not_present
+END(segment_not_present)
 
        /* runs on exception stack */
 ENTRY(stack_segment)
@@ -1006,19 +1025,24 @@ ENTRY(stack_segment)
        paranoidentry do_stack_segment
        jmp paranoid_exit
        CFI_ENDPROC
+END(stack_segment)
 
 KPROBE_ENTRY(general_protection)
        errorentry do_general_protection
+END(general_protection)
        .previous .text
 
 ENTRY(alignment_check)
        errorentry do_alignment_check
+END(alignment_check)
 
 ENTRY(divide_error)
        zeroentry do_divide_error
+END(divide_error)
 
 ENTRY(spurious_interrupt_bug)
        zeroentry do_spurious_interrupt_bug
+END(spurious_interrupt_bug)
 
 #ifdef CONFIG_X86_MCE
        /* runs on exception stack */
@@ -1029,6 +1053,7 @@ ENTRY(machine_check)
        paranoidentry do_machine_check
        jmp paranoid_exit
        CFI_ENDPROC
+END(machine_check)
 #endif
 
 ENTRY(call_softirq)
@@ -1046,3 +1071,37 @@ ENTRY(call_softirq)
        decl %gs:pda_irqcount
        ret
        CFI_ENDPROC
+ENDPROC(call_softirq)
+
+#ifdef CONFIG_STACK_UNWIND
+ENTRY(arch_unwind_init_running)
+       CFI_STARTPROC
+       movq    %r15, R15(%rdi)
+       movq    %r14, R14(%rdi)
+       xchgq   %rsi, %rdx
+       movq    %r13, R13(%rdi)
+       movq    %r12, R12(%rdi)
+       xorl    %eax, %eax
+       movq    %rbp, RBP(%rdi)
+       movq    %rbx, RBX(%rdi)
+       movq    (%rsp), %rcx
+       movq    %rax, R11(%rdi)
+       movq    %rax, R10(%rdi)
+       movq    %rax, R9(%rdi)
+       movq    %rax, R8(%rdi)
+       movq    %rax, RAX(%rdi)
+       movq    %rax, RCX(%rdi)
+       movq    %rax, RDX(%rdi)
+       movq    %rax, RSI(%rdi)
+       movq    %rax, RDI(%rdi)
+       movq    %rax, ORIG_RAX(%rdi)
+       movq    %rcx, RIP(%rdi)
+       leaq    8(%rsp), %rcx
+       movq    $__KERNEL_CS, CS(%rdi)
+       movq    %rax, EFLAGS(%rdi)
+       movq    %rcx, RSP(%rdi)
+       movq    $__KERNEL_DS, SS(%rdi)
+       jmpq    *%rdx
+       CFI_ENDPROC
+ENDPROC(arch_unwind_init_running)
+#endif
index 1a2ab825be98a3ef1dd28840f5ae16b8e6e49740..21c7066e236aa0a0e83ea78505037e51c37d37a8 100644 (file)
@@ -78,22 +78,29 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
 
 static void flat_send_IPI_allbutself(int vector)
 {
-#ifndef CONFIG_HOTPLUG_CPU
-       if (((num_online_cpus()) - 1) >= 1)
-               __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
+#ifdef CONFIG_HOTPLUG_CPU
+       int hotplug = 1;
 #else
-       cpumask_t allbutme = cpu_online_map;
+       int hotplug = 0;
+#endif
+       if (hotplug || vector == NMI_VECTOR) {
+               cpumask_t allbutme = cpu_online_map;
 
-       cpu_clear(smp_processor_id(), allbutme);
+               cpu_clear(smp_processor_id(), allbutme);
 
-       if (!cpus_empty(allbutme))
-               flat_send_IPI_mask(allbutme, vector);
-#endif
+               if (!cpus_empty(allbutme))
+                       flat_send_IPI_mask(allbutme, vector);
+       } else if (num_online_cpus() > 1) {
+               __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
+       }
 }
 
 static void flat_send_IPI_all(int vector)
 {
-       __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+       if (vector == NMI_VECTOR)
+               flat_send_IPI_mask(cpu_online_map, vector);
+       else
+               __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
 
 static int flat_apic_id_registered(void)
@@ -108,10 +115,7 @@ static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
 
 static unsigned int phys_pkg_id(int index_msb)
 {
-       u32 ebx;
-
-       ebx = cpuid_ebx(1);
-       return ((ebx >> 24) & 0xFF) >> index_msb;
+       return hard_smp_processor_id() >> index_msb;
 }
 
 struct genapic apic_flat =  {
index cea20a66c150b38250477d870e170a059d1942a6..e6a71c9556d9561fe8d83a9473fddc63da6367ef 100644 (file)
@@ -2,8 +2,6 @@
  *  linux/arch/x86_64/kernel/head64.c -- prepare to run common code
  *
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
- *
- *  $Id: head64.c,v 1.22 2001/07/06 14:28:20 ak Exp $
  */
 
 #include <linux/init.h>
index 5ecd34ab8c2bba9ae70d15ff9803ead2957c72d9..9b1a4e1473215ba080a7d430562a9168d4660e43 100644 (file)
        BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
        BI(x,c) BI(x,d) BI(x,e) BI(x,f)
 
-#define BUILD_14_IRQS(x) \
+#define BUILD_15_IRQS(x) \
        BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
        BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
        BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
-       BI(x,c) BI(x,d)
+       BI(x,c) BI(x,d) BI(x,e)
 
 /*
  * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
@@ -73,13 +73,13 @@ BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
 BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
 
 #ifdef CONFIG_PCI_MSI
-       BUILD_14_IRQS(0xe)
+       BUILD_15_IRQS(0xe)
 #endif
 
 #endif
 
 #undef BUILD_16_IRQS
-#undef BUILD_14_IRQS
+#undef BUILD_15_IRQS
 #undef BI
 
 
@@ -92,11 +92,11 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
        IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
        IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
 
-#define IRQLIST_14(x) \
+#define IRQLIST_15(x) \
        IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
        IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
        IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
-       IRQ(x,c), IRQ(x,d)
+       IRQ(x,c), IRQ(x,d), IRQ(x,e)
 
 void (*interrupt[NR_IRQS])(void) = {
        IRQLIST_16(0x0),
@@ -108,7 +108,7 @@ void (*interrupt[NR_IRQS])(void) = {
        IRQLIST_16(0xc), IRQLIST_16(0xd)
 
 #ifdef CONFIG_PCI_MSI
-       , IRQLIST_14(0xe)
+       , IRQLIST_15(0xe)
 #endif
 
 #endif
index 9cc7031b715178cb069243fffb1bdbbe49483e1e..c768d8a036d0c7998e802944f4cc0103e4ec0dbc 100644 (file)
@@ -41,6 +41,7 @@
 #include <asm/mach_apic.h>
 #include <asm/acpi.h>
 #include <asm/dma.h>
+#include <asm/nmi.h>
 
 #define __apicdebuginit  __init
 
@@ -56,6 +57,7 @@ int timer_over_8254 __initdata = 0;
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
 
 /*
  * # of IRQ routing registers
@@ -317,7 +319,7 @@ void __init check_ioapic(void)
                                vendor &= 0xffff;
                                switch (vendor) { 
                                case PCI_VENDOR_ID_VIA:
-#ifdef CONFIG_GART_IOMMU
+#ifdef CONFIG_IOMMU
                                        if ((end_pfn > MAX_DMA32_PFN ||
                                             force_iommu) &&
                                            !iommu_aperture_allowed) {
@@ -834,10 +836,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
 int assign_irq_vector(int irq)
 {
        static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+       unsigned long flags;
+       int vector;
 
        BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
-       if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
+
+       spin_lock_irqsave(&vector_lock, flags);
+
+       if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+               spin_unlock_irqrestore(&vector_lock, flags);
                return IO_APIC_VECTOR(irq);
+       }
 next:
        current_vector += 8;
        if (current_vector == IA32_SYSCALL_VECTOR)
@@ -849,11 +858,14 @@ next:
                current_vector = FIRST_DEVICE_VECTOR + offset;
        }
 
-       vector_irq[current_vector] = irq;
+       vector = current_vector;
+       vector_irq[vector] = irq;
        if (irq != AUTO_ASSIGN)
-               IO_APIC_VECTOR(irq) = current_vector;
+               IO_APIC_VECTOR(irq) = vector;
+
+       spin_unlock_irqrestore(&vector_lock, flags);
 
-       return current_vector;
+       return vector;
 }
 
 extern void (*interrupt[NR_IRQS])(void);
@@ -866,21 +878,14 @@ static struct hw_interrupt_type ioapic_edge_type;
 
 static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
-       if (use_pci_vector() && !platform_legacy_irq(irq)) {
-               if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-                               trigger == IOAPIC_LEVEL)
-                       irq_desc[vector].handler = &ioapic_level_type;
-               else
-                       irq_desc[vector].handler = &ioapic_edge_type;
-               set_intr_gate(vector, interrupt[vector]);
-       } else  {
-               if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-                               trigger == IOAPIC_LEVEL)
-                       irq_desc[irq].handler = &ioapic_level_type;
-               else
-                       irq_desc[irq].handler = &ioapic_edge_type;
-               set_intr_gate(vector, interrupt[irq]);
-       }
+       unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+
+       if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+                       trigger == IOAPIC_LEVEL)
+               irq_desc[idx].handler = &ioapic_level_type;
+       else
+               irq_desc[idx].handler = &ioapic_edge_type;
+       set_intr_gate(vector, interrupt[idx]);
 }
 
 static void __init setup_IO_APIC_irqs(void)
index d8bd0b345b1e2494f936765aef9e6a95da5a72dd..59518d4d43589a9b99724ecefbd6b6162b43ac28 100644 (file)
@@ -26,6 +26,30 @@ atomic_t irq_mis_count;
 #endif
 #endif
 
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+/*
+ * Probabilistic stack overflow check:
+ *
+ * Only check the stack in process context, because everything else
+ * runs on the big interrupt stacks. Checking reliably is too expensive,
+ * so we just check from interrupts.
+ */
+static inline void stack_overflow_check(struct pt_regs *regs)
+{
+       u64 curbase = (u64) current->thread_info;
+       static unsigned long warned = -60*HZ;
+
+       if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
+           regs->rsp <  curbase + sizeof(struct thread_info) + 128 &&
+           time_after(jiffies, warned + 60*HZ)) {
+               printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n",
+                      current->comm, curbase, regs->rsp);
+               show_stack(NULL,NULL);
+               warned = jiffies;
+       }
+}
+#endif
+
 /*
  * Generic, controller-independent functions:
  */
@@ -39,7 +63,7 @@ int show_interrupts(struct seq_file *p, void *v)
        if (i == 0) {
                seq_printf(p, "           ");
                for_each_online_cpu(j)
-                       seq_printf(p, "CPU%d       ",j);
+                       seq_printf(p, "CPU%-8d",j);
                seq_putc(p, '\n');
        }
 
@@ -96,7 +120,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 
        exit_idle();
        irq_enter();
-
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+       stack_overflow_check(regs);
+#endif
        __do_IRQ(irq, regs);
        irq_exit();
 
diff --git a/arch/x86_64/kernel/k8.c b/arch/x86_64/kernel/k8.c
new file mode 100644 (file)
index 0000000..6416682
--- /dev/null
@@ -0,0 +1,118 @@
+/*
+ * Shared support code for AMD K8 northbridges and derivates.
+ * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
+ */
+#include <linux/gfp.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/k8.h>
+
+int num_k8_northbridges;
+EXPORT_SYMBOL(num_k8_northbridges);
+
+static u32 *flush_words;
+
+struct pci_device_id k8_nb_ids[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
+       {}
+};
+EXPORT_SYMBOL(k8_nb_ids);
+
+struct pci_dev **k8_northbridges;
+EXPORT_SYMBOL(k8_northbridges);
+
+static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
+{
+       do {
+               dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
+               if (!dev)
+                       break;
+       } while (!pci_match_id(&k8_nb_ids[0], dev));
+       return dev;
+}
+
+int cache_k8_northbridges(void)
+{
+       int i;
+       struct pci_dev *dev;
+       if (num_k8_northbridges)
+               return 0;
+
+       num_k8_northbridges = 0;
+       dev = NULL;
+       while ((dev = next_k8_northbridge(dev)) != NULL)
+               num_k8_northbridges++;
+
+       k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *),
+                                 GFP_KERNEL);
+       if (!k8_northbridges)
+               return -ENOMEM;
+
+       flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL);
+       if (!flush_words) {
+               kfree(k8_northbridges);
+               return -ENOMEM;
+       }
+
+       dev = NULL;
+       i = 0;
+       while ((dev = next_k8_northbridge(dev)) != NULL) {
+               k8_northbridges[i++] = dev;
+               pci_read_config_dword(dev, 0x9c, &flush_words[i]);
+       }
+       k8_northbridges[i] = NULL;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cache_k8_northbridges);
+
+/* Ignores subdevice/subvendor but as far as I can figure out
+   they're useless anyways */
+int __init early_is_k8_nb(u32 device)
+{
+       struct pci_device_id *id;
+       u32 vendor = device & 0xffff;
+       device >>= 16;
+       for (id = k8_nb_ids; id->vendor; id++)
+               if (vendor == id->vendor && device == id->device)
+                       return 1;
+       return 0;
+}
+
+void k8_flush_garts(void)
+{
+       int flushed, i;
+       unsigned long flags;
+       static DEFINE_SPINLOCK(gart_lock);
+
+       /* Avoid races between AGP and IOMMU. In theory it's not needed
+          but I'm not sure if the hardware won't lose flush requests
+          when another is pending. This whole thing is so expensive anyways
+          that it doesn't matter to serialize more. -AK */
+       spin_lock_irqsave(&gart_lock, flags);
+       flushed = 0;
+       for (i = 0; i < num_k8_northbridges; i++) {
+               pci_write_config_dword(k8_northbridges[i], 0x9c,
+                                      flush_words[i]|1);
+               flushed++;
+       }
+       for (i = 0; i < num_k8_northbridges; i++) {
+               u32 w;
+               /* Make sure the hardware actually executed the flush*/
+               for (;;) {
+                       pci_read_config_dword(k8_northbridges[i],
+                                             0x9c, &w);
+                       if (!(w & 1))
+                               break;
+                       cpu_relax();
+               }
+       }
+       spin_unlock_irqrestore(&gart_lock, flags);
+       if (!flushed)
+               printk("nothing to flush?\n");
+}
+EXPORT_SYMBOL_GPL(k8_flush_garts);
+
index c69fc43cee7b0e83870c13769a273e5b0a607413..acd5816b1a6f214d2dfc5253d674ed9d81492fe5 100644 (file)
@@ -562,7 +562,7 @@ static struct sysdev_class mce_sysclass = {
        set_kset_name("machinecheck"),
 };
 
-static DEFINE_PER_CPU(struct sys_device, device_mce);
+DEFINE_PER_CPU(struct sys_device, device_mce);
 
 /* Why are there no generic functions for this? */
 #define ACCESSOR(name, var, start) \
index d13b241ad094952985adf3f83f6a07e3343fad50..335200aa27379a269d8cd3ac1661b351d8cff92e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  (c) 2005 Advanced Micro Devices, Inc.
+ *  (c) 2005, 2006 Advanced Micro Devices, Inc.
  *  Your use of this code is subject to the terms and conditions of the
  *  GNU general public license version 2. See "COPYING" or
  *  http://www.gnu.org/licenses/gpl.html
@@ -8,9 +8,10 @@
  *
  *  Support : jacob.shin@amd.com
  *
- *  MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F.
- *  MC4_MISC0 exists per physical processor.
+ *  April 2006
+ *     - added support for AMD Family 0x10 processors
  *
+ *  All MC4_MISCi registers are shared between multi-cores
  */
 
 #include <linux/cpu.h>
 #include <asm/percpu.h>
 #include <asm/idle.h>
 
-#define PFX "mce_threshold: "
-#define VERSION "version 1.00.9"
-#define NR_BANKS 5
-#define THRESHOLD_MAX 0xFFF
-#define INT_TYPE_APIC 0x00020000
-#define MASK_VALID_HI 0x80000000
-#define MASK_LVTOFF_HI 0x00F00000
-#define MASK_COUNT_EN_HI 0x00080000
-#define MASK_INT_TYPE_HI 0x00060000
-#define MASK_OVERFLOW_HI 0x00010000
+#define PFX               "mce_threshold: "
+#define VERSION           "version 1.1.1"
+#define NR_BANKS          6
+#define NR_BLOCKS         9
+#define THRESHOLD_MAX     0xFFF
+#define INT_TYPE_APIC     0x00020000
+#define MASK_VALID_HI     0x80000000
+#define MASK_LVTOFF_HI    0x00F00000
+#define MASK_COUNT_EN_HI  0x00080000
+#define MASK_INT_TYPE_HI  0x00060000
+#define MASK_OVERFLOW_HI  0x00010000
 #define MASK_ERR_COUNT_HI 0x00000FFF
-#define MASK_OVERFLOW 0x0001000000000000L
+#define MASK_BLKPTR_LO    0xFF000000
+#define MCG_XBLK_ADDR     0xC0000400
 
-struct threshold_bank {
+struct threshold_block {
+       unsigned int block;
+       unsigned int bank;
        unsigned int cpu;
-       u8 bank;
-       u8 interrupt_enable;
+       u32 address;
+       u16 interrupt_enable;
        u16 threshold_limit;
        struct kobject kobj;
+       struct list_head miscj;
 };
 
-static struct threshold_bank threshold_defaults = {
+/* defaults used early on boot */
+static struct threshold_block threshold_defaults = {
        .interrupt_enable = 0,
        .threshold_limit = THRESHOLD_MAX,
 };
 
+struct threshold_bank {
+       struct kobject kobj;
+       struct threshold_block *blocks;
+       cpumask_t cpus;
+};
+static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
+
 #ifdef CONFIG_SMP
 static unsigned char shared_bank[NR_BANKS] = {
        0, 0, 0, 0, 1
@@ -68,12 +82,12 @@ static DEFINE_PER_CPU(unsigned char, bank_map);     /* see which banks are on */
  */
 
 /* must be called with correct cpu affinity */
-static void threshold_restart_bank(struct threshold_bank *b,
+static void threshold_restart_bank(struct threshold_block *b,
                                   int reset, u16 old_limit)
 {
        u32 mci_misc_hi, mci_misc_lo;
 
-       rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
+       rdmsr(b->address, mci_misc_lo, mci_misc_hi);
 
        if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
                reset = 1;      /* limit cannot be lower than err count */
@@ -94,35 +108,57 @@ static void threshold_restart_bank(struct threshold_bank *b,
            (mci_misc_hi &= ~MASK_INT_TYPE_HI);
 
        mci_misc_hi |= MASK_COUNT_EN_HI;
-       wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
+       wrmsr(b->address, mci_misc_lo, mci_misc_hi);
 }
 
+/* cpu init entry point, called from mce.c with preempt off */
 void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
-       int bank;
-       u32 mci_misc_lo, mci_misc_hi;
+       unsigned int bank, block;
        unsigned int cpu = smp_processor_id();
+       u32 low = 0, high = 0, address = 0;
 
        for (bank = 0; bank < NR_BANKS; ++bank) {
-               rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi);
+               for (block = 0; block < NR_BLOCKS; ++block) {
+                       if (block == 0)
+                               address = MSR_IA32_MC0_MISC + bank * 4;
+                       else if (block == 1)
+                               address = MCG_XBLK_ADDR
+                                       + ((low & MASK_BLKPTR_LO) >> 21);
+                       else
+                               ++address;
+
+                       if (rdmsr_safe(address, &low, &high))
+                               continue;
 
-               /* !valid, !counter present, bios locked */
-               if (!(mci_misc_hi & MASK_VALID_HI) ||
-                   !(mci_misc_hi & MASK_VALID_HI >> 1) ||
-                   (mci_misc_hi & MASK_VALID_HI >> 2))
-                       continue;
+                       if (!(high & MASK_VALID_HI)) {
+                               if (block)
+                                       continue;
+                               else
+                                       break;
+                       }
 
-               per_cpu(bank_map, cpu) |= (1 << bank);
+                       if (!(high & MASK_VALID_HI >> 1)  ||
+                            (high & MASK_VALID_HI >> 2))
+                               continue;
 
+                       if (!block)
+                               per_cpu(bank_map, cpu) |= (1 << bank);
 #ifdef CONFIG_SMP
-               if (shared_bank[bank] && cpu_core_id[cpu])
-                       continue;
+                       if (shared_bank[bank] && c->cpu_core_id)
+                               break;
 #endif
+                       high &= ~MASK_LVTOFF_HI;
+                       high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20;
+                       wrmsr(address, low, high);
 
-               setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20);
-               threshold_defaults.cpu = cpu;
-               threshold_defaults.bank = bank;
-               threshold_restart_bank(&threshold_defaults, 0, 0);
+                       setup_APIC_extened_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
+                                              THRESHOLD_APIC_VECTOR,
+                                              K8_APIC_EXT_INT_MSG_FIX, 0);
+
+                       threshold_defaults.address = address;
+                       threshold_restart_bank(&threshold_defaults, 0, 0);
+               }
        }
 }
 
@@ -137,8 +173,9 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
  */
 asmlinkage void mce_threshold_interrupt(void)
 {
-       int bank;
+       unsigned int bank, block;
        struct mce m;
+       u32 low = 0, high = 0, address = 0;
 
        ack_APIC_irq();
        exit_idle();
@@ -150,15 +187,42 @@ asmlinkage void mce_threshold_interrupt(void)
 
        /* assume first bank caused it */
        for (bank = 0; bank < NR_BANKS; ++bank) {
-               m.bank = MCE_THRESHOLD_BASE + bank;
-               rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc);
+               for (block = 0; block < NR_BLOCKS; ++block) {
+                       if (block == 0)
+                               address = MSR_IA32_MC0_MISC + bank * 4;
+                       else if (block == 1)
+                               address = MCG_XBLK_ADDR
+                                       + ((low & MASK_BLKPTR_LO) >> 21);
+                       else
+                               ++address;
+
+                       if (rdmsr_safe(address, &low, &high))
+                               continue;
 
-               if (m.misc & MASK_OVERFLOW) {
-                       mce_log(&m);
-                       goto out;
+                       if (!(high & MASK_VALID_HI)) {
+                               if (block)
+                                       continue;
+                               else
+                                       break;
+                       }
+
+                       if (!(high & MASK_VALID_HI >> 1)  ||
+                            (high & MASK_VALID_HI >> 2))
+                               continue;
+
+                       if (high & MASK_OVERFLOW_HI) {
+                               rdmsrl(address, m.misc);
+                               rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
+                                      m.status);
+                               m.bank = K8_MCE_THRESHOLD_BASE
+                                      + bank * NR_BLOCKS
+                                      + block;
+                               mce_log(&m);
+                               goto out;
+                       }
                }
        }
-      out:
+out:
        irq_exit();
 }
 
@@ -166,20 +230,12 @@ asmlinkage void mce_threshold_interrupt(void)
  * Sysfs Interface
  */
 
-static struct sysdev_class threshold_sysclass = {
-       set_kset_name("threshold"),
-};
-
-static DEFINE_PER_CPU(struct sys_device, device_threshold);
-
 struct threshold_attr {
-        struct attribute attr;
-        ssize_t(*show) (struct threshold_bank *, char *);
-        ssize_t(*store) (struct threshold_bank *, const char *, size_t count);
+       struct attribute attr;
+       ssize_t(*show) (struct threshold_block *, char *);
+       ssize_t(*store) (struct threshold_block *, const char *, size_t count);
 };
 
-static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
-
 static cpumask_t affinity_set(unsigned int cpu)
 {
        cpumask_t oldmask = current->cpus_allowed;
@@ -194,15 +250,15 @@ static void affinity_restore(cpumask_t oldmask)
        set_cpus_allowed(current, oldmask);
 }
 
-#define SHOW_FIELDS(name) \
-        static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \
-        { \
-                return sprintf(buf, "%lx\n", (unsigned long) b->name); \
-        }
+#define SHOW_FIELDS(name)                                           \
+static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
+{                                                                   \
+        return sprintf(buf, "%lx\n", (unsigned long) b->name);      \
+}
 SHOW_FIELDS(interrupt_enable)
 SHOW_FIELDS(threshold_limit)
 
-static ssize_t store_interrupt_enable(struct threshold_bank *b,
+static ssize_t store_interrupt_enable(struct threshold_block *b,
                                      const char *buf, size_t count)
 {
        char *end;
@@ -219,7 +275,7 @@ static ssize_t store_interrupt_enable(struct threshold_bank *b,
        return end - buf;
 }
 
-static ssize_t store_threshold_limit(struct threshold_bank *b,
+static ssize_t store_threshold_limit(struct threshold_block *b,
                                     const char *buf, size_t count)
 {
        char *end;
@@ -242,18 +298,18 @@ static ssize_t store_threshold_limit(struct threshold_bank *b,
        return end - buf;
 }
 
-static ssize_t show_error_count(struct threshold_bank *b, char *buf)
+static ssize_t show_error_count(struct threshold_block *b, char *buf)
 {
        u32 high, low;
        cpumask_t oldmask;
        oldmask = affinity_set(b->cpu);
-       rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */
+       rdmsr(b->address, low, high);
        affinity_restore(oldmask);
        return sprintf(buf, "%x\n",
                       (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
 }
 
-static ssize_t store_error_count(struct threshold_bank *b,
+static ssize_t store_error_count(struct threshold_block *b,
                                 const char *buf, size_t count)
 {
        cpumask_t oldmask;
@@ -269,13 +325,13 @@ static ssize_t store_error_count(struct threshold_bank *b,
         .store = _store,                                      \
 };
 
-#define ATTR_FIELDS(name) \
-        static struct threshold_attr name = \
+#define RW_ATTR(name)                                           \
+static struct threshold_attr name =                             \
         THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
 
-ATTR_FIELDS(interrupt_enable);
-ATTR_FIELDS(threshold_limit);
-ATTR_FIELDS(error_count);
+RW_ATTR(interrupt_enable);
+RW_ATTR(threshold_limit);
+RW_ATTR(error_count);
 
 static struct attribute *default_attrs[] = {
        &interrupt_enable.attr,
@@ -284,12 +340,12 @@ static struct attribute *default_attrs[] = {
        NULL
 };
 
-#define to_bank(k) container_of(k,struct threshold_bank,kobj)
-#define to_attr(a) container_of(a,struct threshold_attr,attr)
+#define to_block(k) container_of(k, struct threshold_block, kobj)
+#define to_attr(a) container_of(a, struct threshold_attr, attr)
 
 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
-       struct threshold_bank *b = to_bank(kobj);
+       struct threshold_block *b = to_block(kobj);
        struct threshold_attr *a = to_attr(attr);
        ssize_t ret;
        ret = a->show ? a->show(b, buf) : -EIO;
@@ -299,7 +355,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 static ssize_t store(struct kobject *kobj, struct attribute *attr,
                     const char *buf, size_t count)
 {
-       struct threshold_bank *b = to_bank(kobj);
+       struct threshold_block *b = to_block(kobj);
        struct threshold_attr *a = to_attr(attr);
        ssize_t ret;
        ret = a->store ? a->store(b, buf, count) : -EIO;
@@ -316,69 +372,174 @@ static struct kobj_type threshold_ktype = {
        .default_attrs = default_attrs,
 };
 
+static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
+                                              unsigned int bank,
+                                              unsigned int block,
+                                              u32 address)
+{
+       int err;
+       u32 low, high;
+       struct threshold_block *b = NULL;
+
+       if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
+               return 0;
+
+       if (rdmsr_safe(address, &low, &high))
+               goto recurse;
+
+       if (!(high & MASK_VALID_HI)) {
+               if (block)
+                       goto recurse;
+               else
+                       return 0;
+       }
+
+       if (!(high & MASK_VALID_HI >> 1)  ||
+            (high & MASK_VALID_HI >> 2))
+               goto recurse;
+
+       b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
+       if (!b)
+               return -ENOMEM;
+       memset(b, 0, sizeof(struct threshold_block));
+
+       b->block = block;
+       b->bank = bank;
+       b->cpu = cpu;
+       b->address = address;
+       b->interrupt_enable = 0;
+       b->threshold_limit = THRESHOLD_MAX;
+
+       INIT_LIST_HEAD(&b->miscj);
+
+       if (per_cpu(threshold_banks, cpu)[bank]->blocks)
+               list_add(&b->miscj,
+                        &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
+       else
+               per_cpu(threshold_banks, cpu)[bank]->blocks = b;
+
+       kobject_set_name(&b->kobj, "misc%i", block);
+       b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj;
+       b->kobj.ktype = &threshold_ktype;
+       err = kobject_register(&b->kobj);
+       if (err)
+               goto out_free;
+recurse:
+       if (!block) {
+               address = (low & MASK_BLKPTR_LO) >> 21;
+               if (!address)
+                       return 0;
+               address += MCG_XBLK_ADDR;
+       } else
+               ++address;
+
+       err = allocate_threshold_blocks(cpu, bank, ++block, address);
+       if (err)
+               goto out_free;
+
+       return err;
+
+out_free:
+       if (b) {
+               kobject_unregister(&b->kobj);
+               kfree(b);
+       }
+       return err;
+}
+
 /* symlinks sibling shared banks to first core.  first core owns dir/files. */
-static __cpuinit int threshold_create_bank(unsigned int cpu, int bank)
+static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 {
-       int err = 0;
+       int i, err = 0;
        struct threshold_bank *b = NULL;
+       cpumask_t oldmask = CPU_MASK_NONE;
+       char name[32];
+
+       sprintf(name, "threshold_bank%i", bank);
 
 #ifdef CONFIG_SMP
-       if (cpu_core_id[cpu] && shared_bank[bank]) {    /* symlink */
-               char name[16];
-               unsigned lcpu = first_cpu(cpu_core_map[cpu]);
-               if (cpu_core_id[lcpu])
-                       goto out;       /* first core not up yet */
+       if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) {   /* symlink */
+               i = first_cpu(cpu_core_map[cpu]);
+
+               /* first core not up yet */
+               if (cpu_data[i].cpu_core_id)
+                       goto out;
+
+               /* already linked */
+               if (per_cpu(threshold_banks, cpu)[bank])
+                       goto out;
+
+               b = per_cpu(threshold_banks, i)[bank];
 
-               b = per_cpu(threshold_banks, lcpu)[bank];
                if (!b)
                        goto out;
-               sprintf(name, "bank%i", bank);
-               err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj,
+
+               err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj,
                                        &b->kobj, name);
                if (err)
                        goto out;
+
+               b->cpus = cpu_core_map[cpu];
                per_cpu(threshold_banks, cpu)[bank] = b;
                goto out;
        }
 #endif
 
-       b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL);
+       b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
        if (!b) {
                err = -ENOMEM;
                goto out;
        }
        memset(b, 0, sizeof(struct threshold_bank));
 
-       b->cpu = cpu;
-       b->bank = bank;
-       b->interrupt_enable = 0;
-       b->threshold_limit = THRESHOLD_MAX;
-       kobject_set_name(&b->kobj, "bank%i", bank);
-       b->kobj.parent = &per_cpu(device_threshold, cpu).kobj;
-       b->kobj.ktype = &threshold_ktype;
-
+       kobject_set_name(&b->kobj, "threshold_bank%i", bank);
+       b->kobj.parent = &per_cpu(device_mce, cpu).kobj;
+#ifndef CONFIG_SMP
+       b->cpus = CPU_MASK_ALL;
+#else
+       b->cpus = cpu_core_map[cpu];
+#endif
        err = kobject_register(&b->kobj);
-       if (err) {
-               kfree(b);
-               goto out;
-       }
+       if (err)
+               goto out_free;
+
        per_cpu(threshold_banks, cpu)[bank] = b;
-      out:
+
+       oldmask = affinity_set(cpu);
+       err = allocate_threshold_blocks(cpu, bank, 0,
+                                       MSR_IA32_MC0_MISC + bank * 4);
+       affinity_restore(oldmask);
+
+       if (err)
+               goto out_free;
+
+       for_each_cpu_mask(i, b->cpus) {
+               if (i == cpu)
+                       continue;
+
+               err = sysfs_create_link(&per_cpu(device_mce, i).kobj,
+                                       &b->kobj, name);
+               if (err)
+                       goto out;
+
+               per_cpu(threshold_banks, i)[bank] = b;
+       }
+
+       goto out;
+
+out_free:
+       per_cpu(threshold_banks, cpu)[bank] = NULL;
+       kfree(b);
+out:
        return err;
 }
 
 /* create dir/files for all valid threshold banks */
 static __cpuinit int threshold_create_device(unsigned int cpu)
 {
-       int bank;
+       unsigned int bank;
        int err = 0;
 
-       per_cpu(device_threshold, cpu).id = cpu;
-       per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
-       err = sysdev_register(&per_cpu(device_threshold, cpu));
-       if (err)
-               goto out;
-
        for (bank = 0; bank < NR_BANKS; ++bank) {
                if (!(per_cpu(bank_map, cpu) & 1 << bank))
                        continue;
@@ -386,7 +547,7 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
                if (err)
                        goto out;
        }
-      out:
+out:
        return err;
 }
 
@@ -397,92 +558,85 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
  *   of shared sysfs dir/files, and rest of the cores will be symlinked to it.
  */
 
-/* cpu hotplug call removes all symlinks before first core dies */
+static __cpuinit void deallocate_threshold_block(unsigned int cpu,
+                                                unsigned int bank)
+{
+       struct threshold_block *pos = NULL;
+       struct threshold_block *tmp = NULL;
+       struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
+
+       if (!head)
+               return;
+
+       list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
+               kobject_unregister(&pos->kobj);
+               list_del(&pos->miscj);
+               kfree(pos);
+       }
+
+       kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
+       per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
+}
+
 static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
 {
+       int i = 0;
        struct threshold_bank *b;
-       char name[16];
+       char name[32];
 
        b = per_cpu(threshold_banks, cpu)[bank];
+
        if (!b)
                return;
-       if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) {
-               sprintf(name, "bank%i", bank);
-               sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name);
-               per_cpu(threshold_banks, cpu)[bank] = NULL;
-       } else {
-               kobject_unregister(&b->kobj);
-               kfree(per_cpu(threshold_banks, cpu)[bank]);
+
+       if (!b->blocks)
+               goto free_out;
+
+       sprintf(name, "threshold_bank%i", bank);
+
+       /* sibling symlink */
+       if (shared_bank[bank] && b->blocks->cpu != cpu) {
+               sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
+               per_cpu(threshold_banks, i)[bank] = NULL;
+               return;
+       }
+
+       /* remove all sibling symlinks before unregistering */
+       for_each_cpu_mask(i, b->cpus) {
+               if (i == cpu)
+                       continue;
+
+               sysfs_remove_link(&per_cpu(device_mce, i).kobj, name);
+               per_cpu(threshold_banks, i)[bank] = NULL;
        }
+
+       deallocate_threshold_block(cpu, bank);
+
+free_out:
+       kobject_unregister(&b->kobj);
+       kfree(b);
+       per_cpu(threshold_banks, cpu)[bank] = NULL;
 }
 
 static __cpuinit void threshold_remove_device(unsigned int cpu)
 {
-       int bank;
+       unsigned int bank;
 
        for (bank = 0; bank < NR_BANKS; ++bank) {
                if (!(per_cpu(bank_map, cpu) & 1 << bank))
                        continue;
                threshold_remove_bank(cpu, bank);
        }
-       sysdev_unregister(&per_cpu(device_threshold, cpu));
 }
 
-/* link all existing siblings when first core comes up */
-static __cpuinit int threshold_create_symlinks(unsigned int cpu)
-{
-       int bank, err = 0;
-       unsigned int lcpu = 0;
-
-       if (cpu_core_id[cpu])
-               return 0;
-       for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
-               if (lcpu == cpu)
-                       continue;
-               for (bank = 0; bank < NR_BANKS; ++bank) {
-                       if (!(per_cpu(bank_map, cpu) & 1 << bank))
-                               continue;
-                       if (!shared_bank[bank])
-                               continue;
-                       err = threshold_create_bank(lcpu, bank);
-               }
-       }
-       return err;
-}
-
-/* remove all symlinks before first core dies. */
-static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
-{
-       int bank;
-       unsigned int lcpu = 0;
-       if (cpu_core_id[cpu])
-               return;
-       for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
-               if (lcpu == cpu)
-                       continue;
-               for (bank = 0; bank < NR_BANKS; ++bank) {
-                       if (!(per_cpu(bank_map, cpu) & 1 << bank))
-                               continue;
-                       if (!shared_bank[bank])
-                               continue;
-                       threshold_remove_bank(lcpu, bank);
-               }
-       }
-}
 #else /* !CONFIG_HOTPLUG_CPU */
-static __cpuinit void threshold_create_symlinks(unsigned int cpu)
-{
-}
-static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
-{
-}
 static void threshold_remove_device(unsigned int cpu)
 {
 }
 #endif
 
 /* get notified when a cpu comes on/off */
-static int threshold_cpu_callback(struct notifier_block *nfb,
+static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
                                            unsigned long action, void *hcpu)
 {
        /* cpu was unsigned int to begin with */
@@ -494,13 +648,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
        switch (action) {
        case CPU_ONLINE:
                threshold_create_device(cpu);
-               threshold_create_symlinks(cpu);
-               break;
-       case CPU_DOWN_PREPARE:
-               threshold_remove_symlinks(cpu);
-               break;
-       case CPU_DOWN_FAILED:
-               threshold_create_symlinks(cpu);
                break;
        case CPU_DEAD:
                threshold_remove_device(cpu);
@@ -512,29 +659,22 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
        return NOTIFY_OK;
 }
 
-static struct notifier_block threshold_cpu_notifier = {
+static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
        .notifier_call = threshold_cpu_callback,
 };
 
 static __init int threshold_init_device(void)
 {
-       int err;
-       int lcpu = 0;
-
-       err = sysdev_class_register(&threshold_sysclass);
-       if (err)
-               goto out;
+       unsigned lcpu = 0;
 
        /* to hit CPUs online before the notifier is up */
        for_each_online_cpu(lcpu) {
-               err = threshold_create_device(lcpu);
+               int err = threshold_create_device(lcpu);
                if (err)
-                       goto out;
+                       return err;
        }
        register_cpu_notifier(&threshold_cpu_notifier);
-
-      out:
-       return err;
+       return 0;
 }
 
 device_initcall(threshold_init_device);
index bac195c74bccfce81033da6d473fefea1a2cabba..9d0958ff547f61a21066a3d4b2b3bd1373157209 100644 (file)
@@ -145,26 +145,38 @@ int apply_relocate(Elf_Shdr *sechdrs,
        return -ENOSYS;
 } 
 
-extern void apply_alternatives(void *start, void *end); 
-
 int module_finalize(const Elf_Ehdr *hdr,
-                   const Elf_Shdr *sechdrs,
-                   struct module *me)
+                    const Elf_Shdr *sechdrs,
+                    struct module *me)
 {
-       const Elf_Shdr *s;
+       const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
        char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
-       /* look for .altinstructions to patch */ 
-       for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 
-               void *seg;              
-               if (strcmp(".altinstructions", secstrings + s->sh_name))
-                       continue;
-               seg = (void *)s->sh_addr; 
-               apply_alternatives(seg, seg + s->sh_size); 
-       }       
+       for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+               if (!strcmp(".text", secstrings + s->sh_name))
+                       text = s;
+               if (!strcmp(".altinstructions", secstrings + s->sh_name))
+                       alt = s;
+               if (!strcmp(".smp_locks", secstrings + s->sh_name))
+                       locks= s;
+       }
+
+       if (alt) {
+               /* patch .altinstructions */
+               void *aseg = (void *)alt->sh_addr;
+               apply_alternatives(aseg, aseg + alt->sh_size);
+       }
+       if (locks && text) {
+               void *lseg = (void *)locks->sh_addr;
+               void *tseg = (void *)text->sh_addr;
+               alternatives_smp_module_add(me, me->name,
+                                           lseg, lseg + locks->sh_size,
+                                           tseg, tseg + text->sh_size);
+       }
        return 0;
 }
 
 void module_arch_cleanup(struct module *mod)
 {
+       alternatives_smp_module_del(mod);
 }
index 4e6357fe0ec3e21a76e8afc9a38a4211342e899b..399489c93132c2b9c3d6954b9b2d7149ad084f63 100644 (file)
 #include <linux/config.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
 #include <linux/module.h>
 #include <linux/sysdev.h>
 #include <linux/nmi.h>
 #include <linux/kprobes.h>
 
 #include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
 #include <asm/nmi.h>
-#include <asm/msr.h>
 #include <asm/proto.h>
 #include <asm/kdebug.h>
-#include <asm/local.h>
 #include <asm/mce.h>
+#include <asm/intel_arch_perfmon.h>
 
 /*
  * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -74,6 +67,9 @@ static unsigned int nmi_p4_cccr_val;
 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING   0x76
 #define K7_NMI_EVENT           K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
 
+#define ARCH_PERFMON_NMI_EVENT_SEL     ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK   ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
 #define MSR_P4_MISC_ENABLE     0x1A0
 #define MSR_P4_MISC_ENABLE_PERF_AVAIL  (1<<7)
 #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL        (1<<12)
@@ -105,7 +101,10 @@ static __cpuinit inline int nmi_known_cpu(void)
        case X86_VENDOR_AMD:
                return boot_cpu_data.x86 == 15;
        case X86_VENDOR_INTEL:
-               return boot_cpu_data.x86 == 15;
+               if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+                       return 1;
+               else
+                       return (boot_cpu_data.x86 == 15);
        }
        return 0;
 }
@@ -211,6 +210,8 @@ int __init setup_nmi_watchdog(char *str)
 
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
+static void disable_intel_arch_watchdog(void);
+
 static void disable_lapic_nmi_watchdog(void)
 {
        if (nmi_active <= 0)
@@ -223,6 +224,8 @@ static void disable_lapic_nmi_watchdog(void)
                if (boot_cpu_data.x86 == 15) {
                        wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
                        wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
+               } else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+                       disable_intel_arch_watchdog();
                }
                break;
        }
@@ -375,6 +378,53 @@ static void setup_k7_watchdog(void)
        wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
 }
 
+static void disable_intel_arch_watchdog(void)
+{
+       unsigned ebx;
+
+       /*
+        * Check whether the Architectural PerfMon supports
+        * Unhalted Core Cycles Event or not.
+        * NOTE: Corresponding bit = 0 in ebp indicates event present.
+        */
+       ebx = cpuid_ebx(10);
+       if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+               wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
+}
+
+static int setup_intel_arch_watchdog(void)
+{
+       unsigned int evntsel;
+       unsigned ebx;
+
+       /*
+        * Check whether the Architectural PerfMon supports
+        * Unhalted Core Cycles Event or not.
+        * NOTE: Corresponding bit = 0 in ebp indicates event present.
+        */
+       ebx = cpuid_ebx(10);
+       if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+               return 0;
+
+       nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+
+       clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
+       clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
+
+       evntsel = ARCH_PERFMON_EVENTSEL_INT
+               | ARCH_PERFMON_EVENTSEL_OS
+               | ARCH_PERFMON_EVENTSEL_USR
+               | ARCH_PERFMON_NMI_EVENT_SEL
+               | ARCH_PERFMON_NMI_EVENT_UMASK;
+
+       wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+       wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz));
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+       wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+       return 1;
+}
+
 
 static int setup_p4_watchdog(void)
 {
@@ -428,10 +478,16 @@ void setup_apic_nmi_watchdog(void)
                setup_k7_watchdog();
                break;
        case X86_VENDOR_INTEL:
-               if (boot_cpu_data.x86 != 15)
-                       return;
-               if (!setup_p4_watchdog())
+               if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+                       if (!setup_intel_arch_watchdog())
+                               return;
+               } else if (boot_cpu_data.x86 == 15) {
+                       if (!setup_p4_watchdog())
+                               return;
+               } else {
                        return;
+               }
+
                break;
 
        default:
@@ -516,7 +572,14 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
                         */
                        wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
                        apic_write(APIC_LVTPC, APIC_DM_NMI);
-               }
+               } else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
+                       /*
+                        * For Intel based architectural perfmon
+                        * - LVTPC is masked on interrupt and must be
+                        *   unmasked by the LVTPC handler.
+                        */
+                       apic_write(APIC_LVTPC, APIC_DM_NMI);
+               }
                wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
        }
 }
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
new file mode 100644 (file)
index 0000000..d91cb84
--- /dev/null
@@ -0,0 +1,1018 @@
+/*
+ * Derived from arch/powerpc/kernel/iommu.c
+ *
+ * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
+ * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/pci_ids.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <asm/proto.h>
+#include <asm/calgary.h>
+#include <asm/tce.h>
+#include <asm/pci-direct.h>
+#include <asm/system.h>
+#include <asm/dma.h>
+
+#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
+#define PCI_VENDOR_DEVICE_ID_CALGARY \
+       (PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16)
+
+/* we need these for register space address calculation */
+#define START_ADDRESS           0xfe000000
+#define CHASSIS_BASE            0
+#define ONE_BASED_CHASSIS_NUM   1
+
+/* register offsets inside the host bridge space */
+#define PHB_CSR_OFFSET         0x0110
+#define PHB_PLSSR_OFFSET       0x0120
+#define PHB_CONFIG_RW_OFFSET   0x0160
+#define PHB_IOBASE_BAR_LOW     0x0170
+#define PHB_IOBASE_BAR_HIGH    0x0180
+#define PHB_MEM_1_LOW          0x0190
+#define PHB_MEM_1_HIGH         0x01A0
+#define PHB_IO_ADDR_SIZE       0x01B0
+#define PHB_MEM_1_SIZE         0x01C0
+#define PHB_MEM_ST_OFFSET      0x01D0
+#define PHB_AER_OFFSET         0x0200
+#define PHB_CONFIG_0_HIGH      0x0220
+#define PHB_CONFIG_0_LOW       0x0230
+#define PHB_CONFIG_0_END       0x0240
+#define PHB_MEM_2_LOW          0x02B0
+#define PHB_MEM_2_HIGH         0x02C0
+#define PHB_MEM_2_SIZE_HIGH    0x02D0
+#define PHB_MEM_2_SIZE_LOW     0x02E0
+#define PHB_DOSHOLE_OFFSET     0x08E0
+
+/* PHB_CONFIG_RW */
+#define PHB_TCE_ENABLE         0x20000000
+#define PHB_SLOT_DISABLE       0x1C000000
+#define PHB_DAC_DISABLE                0x01000000
+#define PHB_MEM2_ENABLE                0x00400000
+#define PHB_MCSR_ENABLE                0x00100000
+/* TAR (Table Address Register) */
+#define TAR_SW_BITS            0x0000ffffffff800fUL
+#define TAR_VALID              0x0000000000000008UL
+/* CSR (Channel/DMA Status Register) */
+#define CSR_AGENT_MASK         0xffe0ffff
+
+#define MAX_NUM_OF_PHBS                8 /* how many PHBs in total? */
+#define MAX_PHB_BUS_NUM                (MAX_NUM_OF_PHBS * 2) /* max dev->bus->number */
+#define PHBS_PER_CALGARY       4
+
+/* register offsets in Calgary's internal register space */
+static const unsigned long tar_offsets[] = {
+       0x0580 /* TAR0 */,
+       0x0588 /* TAR1 */,
+       0x0590 /* TAR2 */,
+       0x0598 /* TAR3 */
+};
+
+static const unsigned long split_queue_offsets[] = {
+       0x4870 /* SPLIT QUEUE 0 */,
+       0x5870 /* SPLIT QUEUE 1 */,
+       0x6870 /* SPLIT QUEUE 2 */,
+       0x7870 /* SPLIT QUEUE 3 */
+};
+
+static const unsigned long phb_offsets[] = {
+       0x8000 /* PHB0 */,
+       0x9000 /* PHB1 */,
+       0xA000 /* PHB2 */,
+       0xB000 /* PHB3 */
+};
+
+void* tce_table_kva[MAX_NUM_OF_PHBS * MAX_NUMNODES];
+unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
+static int translate_empty_slots __read_mostly = 0;
+static int calgary_detected __read_mostly = 0;
+
+/*
+ * the bitmap of PHBs the user requested that we disable
+ * translation on.
+ */
+static DECLARE_BITMAP(translation_disabled, MAX_NUMNODES * MAX_PHB_BUS_NUM);
+
+static void tce_cache_blast(struct iommu_table *tbl);
+
+/* enable this to stress test the chip's TCE cache */
+#ifdef CONFIG_IOMMU_DEBUG
+static inline void tce_cache_blast_stress(struct iommu_table *tbl)
+{
+       tce_cache_blast(tbl);
+}
+#else
+static inline void tce_cache_blast_stress(struct iommu_table *tbl)
+{
+}
+#endif /* BLAST_TCE_CACHE_ON_UNMAP */
+
+static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
+{
+       unsigned int npages;
+
+       npages = PAGE_ALIGN(dma + dmalen) - (dma & PAGE_MASK);
+       npages >>= PAGE_SHIFT;
+
+       return npages;
+}
+
+static inline int translate_phb(struct pci_dev* dev)
+{
+       int disabled = test_bit(dev->bus->number, translation_disabled);
+       return !disabled;
+}
+
+static void iommu_range_reserve(struct iommu_table *tbl,
+        unsigned long start_addr, unsigned int npages)
+{
+       unsigned long index;
+       unsigned long end;
+
+       index = start_addr >> PAGE_SHIFT;
+
+       /* bail out if we're asked to reserve a region we don't cover */
+       if (index >= tbl->it_size)
+               return;
+
+       end = index + npages;
+       if (end > tbl->it_size) /* don't go off the table */
+               end = tbl->it_size;
+
+       while (index < end) {
+               if (test_bit(index, tbl->it_map))
+                       printk(KERN_ERR "Calgary: entry already allocated at "
+                              "0x%lx tbl %p dma 0x%lx npages %u\n",
+                              index, tbl, start_addr, npages);
+               ++index;
+       }
+       set_bit_string(tbl->it_map, start_addr >> PAGE_SHIFT, npages);
+}
+
+static unsigned long iommu_range_alloc(struct iommu_table *tbl,
+       unsigned int npages)
+{
+       unsigned long offset;
+
+       BUG_ON(npages == 0);
+
+       offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
+                                      tbl->it_size, npages);
+       if (offset == ~0UL) {
+               tce_cache_blast(tbl);
+               offset = find_next_zero_string(tbl->it_map, 0,
+                                              tbl->it_size, npages);
+               if (offset == ~0UL) {
+                       printk(KERN_WARNING "Calgary: IOMMU full.\n");
+                       if (panic_on_overflow)
+                               panic("Calgary: fix the allocator.\n");
+                       else
+                               return bad_dma_address;
+               }
+       }
+
+       set_bit_string(tbl->it_map, offset, npages);
+       tbl->it_hint = offset + npages;
+       BUG_ON(tbl->it_hint > tbl->it_size);
+
+       return offset;
+}
+
+static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
+       unsigned int npages, int direction)
+{
+       unsigned long entry, flags;
+       dma_addr_t ret = bad_dma_address;
+
+       spin_lock_irqsave(&tbl->it_lock, flags);
+
+       entry = iommu_range_alloc(tbl, npages);
+
+       if (unlikely(entry == bad_dma_address))
+               goto error;
+
+       /* set the return dma address */
+       ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
+
+       /* put the TCEs in the HW table */
+       tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
+                 direction);
+
+       spin_unlock_irqrestore(&tbl->it_lock, flags);
+
+       return ret;
+
+error:
+       spin_unlock_irqrestore(&tbl->it_lock, flags);
+       printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
+              "iommu %p\n", npages, tbl);
+       return bad_dma_address;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+       unsigned int npages)
+{
+       unsigned long entry;
+       unsigned long i;
+
+       entry = dma_addr >> PAGE_SHIFT;
+
+       BUG_ON(entry + npages > tbl->it_size);
+
+       tce_free(tbl, entry, npages);
+
+       for (i = 0; i < npages; ++i) {
+               if (!test_bit(entry + i, tbl->it_map))
+                       printk(KERN_ERR "Calgary: bit is off at 0x%lx "
+                              "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
+                              entry + i, tbl, dma_addr, entry, npages);
+       }
+
+       __clear_bit_string(tbl->it_map, entry, npages);
+
+       tce_cache_blast_stress(tbl);
+}
+
+static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+       unsigned int npages)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&tbl->it_lock, flags);
+
+       __iommu_free(tbl, dma_addr, npages);
+
+       spin_unlock_irqrestore(&tbl->it_lock, flags);
+}
+
+static void __calgary_unmap_sg(struct iommu_table *tbl,
+       struct scatterlist *sglist, int nelems, int direction)
+{
+       while (nelems--) {
+               unsigned int npages;
+               dma_addr_t dma = sglist->dma_address;
+               unsigned int dmalen = sglist->dma_length;
+
+               if (dmalen == 0)
+                       break;
+
+               npages = num_dma_pages(dma, dmalen);
+               __iommu_free(tbl, dma, npages);
+               sglist++;
+       }
+}
+
+void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
+                     int nelems, int direction)
+{
+       unsigned long flags;
+       struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+
+       if (!translate_phb(to_pci_dev(dev)))
+               return;
+
+       spin_lock_irqsave(&tbl->it_lock, flags);
+
+       __calgary_unmap_sg(tbl, sglist, nelems, direction);
+
+       spin_unlock_irqrestore(&tbl->it_lock, flags);
+}
+
+static int calgary_nontranslate_map_sg(struct device* dev,
+       struct scatterlist *sg, int nelems, int direction)
+{
+       int i;
+
+       for (i = 0; i < nelems; i++ ) {
+               struct scatterlist *s = &sg[i];
+               BUG_ON(!s->page);
+               s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
+               s->dma_length = s->length;
+       }
+       return nelems;
+}
+
+int calgary_map_sg(struct device *dev, struct scatterlist *sg,
+       int nelems, int direction)
+{
+       struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+       unsigned long flags;
+       unsigned long vaddr;
+       unsigned int npages;
+       unsigned long entry;
+       int i;
+
+       if (!translate_phb(to_pci_dev(dev)))
+               return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
+
+       spin_lock_irqsave(&tbl->it_lock, flags);
+
+       for (i = 0; i < nelems; i++ ) {
+               struct scatterlist *s = &sg[i];
+               BUG_ON(!s->page);
+
+               vaddr = (unsigned long)page_address(s->page) + s->offset;
+               npages = num_dma_pages(vaddr, s->length);
+
+               entry = iommu_range_alloc(tbl, npages);
+               if (entry == bad_dma_address) {
+                       /* makes sure unmap knows to stop */
+                       s->dma_length = 0;
+                       goto error;
+               }
+
+               s->dma_address = (entry << PAGE_SHIFT) | s->offset;
+
+               /* insert into HW table */
+               tce_build(tbl, entry, npages, vaddr & PAGE_MASK,
+                         direction);
+
+               s->dma_length = s->length;
+       }
+
+       spin_unlock_irqrestore(&tbl->it_lock, flags);
+
+       return nelems;
+error:
+       __calgary_unmap_sg(tbl, sg, nelems, direction);
+       for (i = 0; i < nelems; i++) {
+               sg[i].dma_address = bad_dma_address;
+               sg[i].dma_length = 0;
+       }
+       spin_unlock_irqrestore(&tbl->it_lock, flags);
+       return 0;
+}
+
+dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
+       size_t size, int direction)
+{
+       dma_addr_t dma_handle = bad_dma_address;
+       unsigned long uaddr;
+       unsigned int npages;
+       struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+
+       uaddr = (unsigned long)vaddr;
+       npages = num_dma_pages(uaddr, size);
+
+       if (translate_phb(to_pci_dev(dev)))
+               dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
+       else
+               dma_handle = virt_to_bus(vaddr);
+
+       return dma_handle;
+}
+
+void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
+       size_t size, int direction)
+{
+       struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+       unsigned int npages;
+
+       if (!translate_phb(to_pci_dev(dev)))
+               return;
+
+       npages = num_dma_pages(dma_handle, size);
+       iommu_free(tbl, dma_handle, npages);
+}
+
+void* calgary_alloc_coherent(struct device *dev, size_t size,
+       dma_addr_t *dma_handle, gfp_t flag)
+{
+       void *ret = NULL;
+       dma_addr_t mapping;
+       unsigned int npages, order;
+       struct iommu_table *tbl;
+
+       tbl = to_pci_dev(dev)->bus->self->sysdata;
+
+       size = PAGE_ALIGN(size); /* size rounded up to full pages */
+       npages = size >> PAGE_SHIFT;
+       order = get_order(size);
+
+       /* alloc enough pages (and possibly more) */
+       ret = (void *)__get_free_pages(flag, order);
+       if (!ret)
+               goto error;
+       memset(ret, 0, size);
+
+       if (translate_phb(to_pci_dev(dev))) {
+               /* set up tces to cover the allocated range */
+               mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL);
+               if (mapping == bad_dma_address)
+                       goto free;
+
+               *dma_handle = mapping;
+       } else /* non translated slot */
+               *dma_handle = virt_to_bus(ret);
+
+       return ret;
+
+free:
+       free_pages((unsigned long)ret, get_order(size));
+       ret = NULL;
+error:
+       return ret;
+}
+
+static struct dma_mapping_ops calgary_dma_ops = {
+       .alloc_coherent = calgary_alloc_coherent,
+       .map_single = calgary_map_single,
+       .unmap_single = calgary_unmap_single,
+       .map_sg = calgary_map_sg,
+       .unmap_sg = calgary_unmap_sg,
+};
+
+static inline int busno_to_phbid(unsigned char num)
+{
+       return bus_to_phb(num) % PHBS_PER_CALGARY;
+}
+
+static inline unsigned long split_queue_offset(unsigned char num)
+{
+       size_t idx = busno_to_phbid(num);
+
+       return split_queue_offsets[idx];
+}
+
+static inline unsigned long tar_offset(unsigned char num)
+{
+       size_t idx = busno_to_phbid(num);
+
+       return tar_offsets[idx];
+}
+
+static inline unsigned long phb_offset(unsigned char num)
+{
+       size_t idx = busno_to_phbid(num);
+
+       return phb_offsets[idx];
+}
+
+static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset)
+{
+       unsigned long target = ((unsigned long)bar) | offset;
+       return (void __iomem*)target;
+}
+
+static void tce_cache_blast(struct iommu_table *tbl)
+{
+       u64 val;
+       u32 aer;
+       int i = 0;
+       void __iomem *bbar = tbl->bbar;
+       void __iomem *target;
+
+       /* disable arbitration on the bus */
+       target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
+       aer = readl(target);
+       writel(0, target);
+
+       /* read plssr to ensure it got there */
+       target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
+       val = readl(target);
+
+       /* poll split queues until all DMA activity is done */
+       target = calgary_reg(bbar, split_queue_offset(tbl->it_busno));
+       do {
+               val = readq(target);
+               i++;
+       } while ((val & 0xff) != 0xff && i < 100);
+       if (i == 100)
+               printk(KERN_WARNING "Calgary: PCI bus not quiesced, "
+                      "continuing anyway\n");
+
+       /* invalidate TCE cache */
+       target = calgary_reg(bbar, tar_offset(tbl->it_busno));
+       writeq(tbl->tar_val, target);
+
+       /* enable arbitration */
+       target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
+       writel(aer, target);
+       (void)readl(target); /* flush */
+}
+
+static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start,
+       u64 limit)
+{
+       unsigned int numpages;
+
+       limit = limit | 0xfffff;
+       limit++;
+
+       numpages = ((limit - start) >> PAGE_SHIFT);
+       iommu_range_reserve(dev->sysdata, start, numpages);
+}
+
+static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
+{
+       void __iomem *target;
+       u64 low, high, sizelow;
+       u64 start, limit;
+       struct iommu_table *tbl = dev->sysdata;
+       unsigned char busnum = dev->bus->number;
+       void __iomem *bbar = tbl->bbar;
+
+       /* peripheral MEM_1 region */
+       target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW);
+       low = be32_to_cpu(readl(target));
+       target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH);
+       high = be32_to_cpu(readl(target));
+       target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE);
+       sizelow = be32_to_cpu(readl(target));
+
+       start = (high << 32) | low;
+       limit = sizelow;
+
+       calgary_reserve_mem_region(dev, start, limit);
+}
+
+static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
+{
+       void __iomem *target;
+       u32 val32;
+       u64 low, high, sizelow, sizehigh;
+       u64 start, limit;
+       struct iommu_table *tbl = dev->sysdata;
+       unsigned char busnum = dev->bus->number;
+       void __iomem *bbar = tbl->bbar;
+
+       /* is it enabled? */
+       target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
+       val32 = be32_to_cpu(readl(target));
+       if (!(val32 & PHB_MEM2_ENABLE))
+               return;
+
+       target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW);
+       low = be32_to_cpu(readl(target));
+       target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH);
+       high = be32_to_cpu(readl(target));
+       target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW);
+       sizelow = be32_to_cpu(readl(target));
+       target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH);
+       sizehigh = be32_to_cpu(readl(target));
+
+       start = (high << 32) | low;
+       limit = (sizehigh << 32) | sizelow;
+
+       calgary_reserve_mem_region(dev, start, limit);
+}
+
+/*
+ * some regions of the IO address space do not get translated, so we
+ * must not give devices IO addresses in those regions. The regions
+ * are the 640KB-1MB region and the two PCI peripheral memory holes.
+ * Reserve all of them in the IOMMU bitmap to avoid giving them out
+ * later.
+ */
+static void __init calgary_reserve_regions(struct pci_dev *dev)
+{
+       unsigned int npages;
+       void __iomem *bbar;
+       unsigned char busnum;
+       u64 start;
+       struct iommu_table *tbl = dev->sysdata;
+
+       bbar = tbl->bbar;
+       busnum = dev->bus->number;
+
+       /* reserve bad_dma_address in case it's a legal address */
+       iommu_range_reserve(tbl, bad_dma_address, 1);
+
+       /* avoid the BIOS/VGA first 640KB-1MB region */
+       start = (640 * 1024);
+       npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
+       iommu_range_reserve(tbl, start, npages);
+
+       /* reserve the two PCI peripheral memory regions in IO space */
+       calgary_reserve_peripheral_mem_1(dev);
+       calgary_reserve_peripheral_mem_2(dev);
+}
+
+static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
+{
+       u64 val64;
+       u64 table_phys;
+       void __iomem *target;
+       int ret;
+       struct iommu_table *tbl;
+
+       /* build TCE tables for each PHB */
+       ret = build_tce_table(dev, bbar);
+       if (ret)
+               return ret;
+
+       calgary_reserve_regions(dev);
+
+       /* set TARs for each PHB */
+       target = calgary_reg(bbar, tar_offset(dev->bus->number));
+       val64 = be64_to_cpu(readq(target));
+
+       /* zero out all TAR bits under sw control */
+       val64 &= ~TAR_SW_BITS;
+
+       tbl = dev->sysdata;
+       table_phys = (u64)__pa(tbl->it_base);
+       val64 |= table_phys;
+
+       BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M);
+       val64 |= (u64) specified_table_size;
+
+       tbl->tar_val = cpu_to_be64(val64);
+       writeq(tbl->tar_val, target);
+       readq(target); /* flush */
+
+       return 0;
+}
+
+static void __init calgary_free_tar(struct pci_dev *dev)
+{
+       u64 val64;
+       struct iommu_table *tbl = dev->sysdata;
+       void __iomem *target;
+
+       target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
+       val64 = be64_to_cpu(readq(target));
+       val64 &= ~TAR_SW_BITS;
+       writeq(cpu_to_be64(val64), target);
+       readq(target); /* flush */
+
+       kfree(tbl);
+       dev->sysdata = NULL;
+}
+
+static void calgary_watchdog(unsigned long data)
+{
+       struct pci_dev *dev = (struct pci_dev *)data;
+       struct iommu_table *tbl = dev->sysdata;
+       void __iomem *bbar = tbl->bbar;
+       u32 val32;
+       void __iomem *target;
+
+       target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
+       val32 = be32_to_cpu(readl(target));
+
+       /* If no error, the agent ID in the CSR is not valid */
+       if (val32 & CSR_AGENT_MASK) {
+               printk(KERN_EMERG "calgary_watchdog: DMA error on bus %d, "
+                                 "CSR = %#x\n", dev->bus->number, val32);
+               writel(0, target);
+
+               /* Disable bus that caused the error */
+               target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
+                                          PHB_CONFIG_RW_OFFSET);
+               val32 = be32_to_cpu(readl(target));
+               val32 |= PHB_SLOT_DISABLE;
+               writel(cpu_to_be32(val32), target);
+               readl(target); /* flush */
+       } else {
+               /* Reset the timer */
+               mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ);
+       }
+}
+
+static void __init calgary_enable_translation(struct pci_dev *dev)
+{
+       u32 val32;
+       unsigned char busnum;
+       void __iomem *target;
+       void __iomem *bbar;
+       struct iommu_table *tbl;
+
+       busnum = dev->bus->number;
+       tbl = dev->sysdata;
+       bbar = tbl->bbar;
+
+       /* enable TCE in PHB Config Register */
+       target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
+       val32 = be32_to_cpu(readl(target));
+       val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE;
+
+       printk(KERN_INFO "Calgary: enabling translation on PHB %d\n", busnum);
+       printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this "
+              "bus.\n");
+
+       writel(cpu_to_be32(val32), target);
+       readl(target); /* flush */
+
+       init_timer(&tbl->watchdog_timer);
+       tbl->watchdog_timer.function = &calgary_watchdog;
+       tbl->watchdog_timer.data = (unsigned long)dev;
+       mod_timer(&tbl->watchdog_timer, jiffies);
+}
+
+static void __init calgary_disable_translation(struct pci_dev *dev)
+{
+       u32 val32;
+       unsigned char busnum;
+       void __iomem *target;
+       void __iomem *bbar;
+       struct iommu_table *tbl;
+
+       busnum = dev->bus->number;
+       tbl = dev->sysdata;
+       bbar = tbl->bbar;
+
+       /* disable TCE in PHB Config Register */
+       target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
+       val32 = be32_to_cpu(readl(target));
+       val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE);
+
+       printk(KERN_INFO "Calgary: disabling translation on PHB %d!\n", busnum);
+       writel(cpu_to_be32(val32), target);
+       readl(target); /* flush */
+
+       del_timer_sync(&tbl->watchdog_timer);
+}
+
+static inline unsigned int __init locate_register_space(struct pci_dev *dev)
+{
+       int rionodeid;
+       u32 address;
+
+       rionodeid = (dev->bus->number % 15 > 4) ? 3 : 2;
+       /*
+        * register space address calculation as follows:
+        * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
+        * ChassisBase is always zero for x366/x260/x460
+        * RioNodeId is 2 for first Calgary, 3 for second Calgary
+        */
+       address = START_ADDRESS -
+               (0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 15)) +
+               (0x100000) * (rionodeid - CHASSIS_BASE);
+       return address;
+}
+
+static int __init calgary_init_one_nontraslated(struct pci_dev *dev)
+{
+       dev->sysdata = NULL;
+       dev->bus->self = dev;
+
+       return 0;
+}
+
+static int __init calgary_init_one(struct pci_dev *dev)
+{
+       u32 address;
+       void __iomem *bbar;
+       int ret;
+
+       address = locate_register_space(dev);
+       /* map entire 1MB of Calgary config space */
+       bbar = ioremap_nocache(address, 1024 * 1024);
+       if (!bbar) {
+               ret = -ENODATA;
+               goto done;
+       }
+
+       ret = calgary_setup_tar(dev, bbar);
+       if (ret)
+               goto iounmap;
+
+       dev->bus->self = dev;
+       calgary_enable_translation(dev);
+
+       return 0;
+
+iounmap:
+       iounmap(bbar);
+done:
+       return ret;
+}
+
+static int __init calgary_init(void)
+{
+       int i, ret = -ENODEV;
+       struct pci_dev *dev = NULL;
+
+       for (i = 0; i <= num_online_nodes() * MAX_NUM_OF_PHBS; i++) {
+               dev = pci_get_device(PCI_VENDOR_ID_IBM,
+                                    PCI_DEVICE_ID_IBM_CALGARY,
+                                    dev);
+               if (!dev)
+                       break;
+               if (!translate_phb(dev)) {
+                       calgary_init_one_nontraslated(dev);
+                       continue;
+               }
+               if (!tce_table_kva[i] && !translate_empty_slots) {
+                       pci_dev_put(dev);
+                       continue;
+               }
+               ret = calgary_init_one(dev);
+               if (ret)
+                       goto error;
+       }
+
+       return ret;
+
+error:
+       for (i--; i >= 0; i--) {
+               dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
+                                             PCI_DEVICE_ID_IBM_CALGARY,
+                                             dev);
+               if (!translate_phb(dev)) {
+                       pci_dev_put(dev);
+                       continue;
+               }
+               if (!tce_table_kva[i] && !translate_empty_slots)
+                       continue;
+               calgary_disable_translation(dev);
+               calgary_free_tar(dev);
+               pci_dev_put(dev);
+       }
+
+       return ret;
+}
+
+static inline int __init determine_tce_table_size(u64 ram)
+{
+       int ret;
+
+       if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED)
+               return specified_table_size;
+
+       /*
+        * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to
+        * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each
+        * larger table size has twice as many entries, so shift the
+        * max ram address by 13 to divide by 8K and then look at the
+        * order of the result to choose between 0-7.
+        */
+       ret = get_order(ram >> 13);
+       if (ret > TCE_TABLE_SIZE_8M)
+               ret = TCE_TABLE_SIZE_8M;
+
+       return ret;
+}
+
+void __init detect_calgary(void)
+{
+       u32 val;
+       int bus, table_idx;
+       void *tbl;
+       int detected = 0;
+
+       /*
+        * if the user specified iommu=off or iommu=soft or we found
+        * another HW IOMMU already, bail out.
+        */
+       if (swiotlb || no_iommu || iommu_detected)
+               return;
+
+       specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
+
+       for (bus = 0, table_idx = 0;
+            bus <= num_online_nodes() * MAX_PHB_BUS_NUM;
+            bus++) {
+               BUG_ON(bus > MAX_NUMNODES * MAX_PHB_BUS_NUM);
+               if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
+                       continue;
+               if (test_bit(bus, translation_disabled)) {
+                       printk(KERN_INFO "Calgary: translation is disabled for "
+                              "PHB 0x%x\n", bus);
+                       /* skip this phb, don't allocate a tbl for it */
+                       tce_table_kva[table_idx] = NULL;
+                       table_idx++;
+                       continue;
+               }
+               /*
+                * scan the first slot of the PCI bus to see if there
+                * are any devices present
+                */
+               val = read_pci_config(bus, 1, 0, 0);
+               if (val != 0xffffffff || translate_empty_slots) {
+                       tbl = alloc_tce_table();
+                       if (!tbl)
+                               goto cleanup;
+                       detected = 1;
+               } else
+                       tbl = NULL;
+
+               tce_table_kva[table_idx] = tbl;
+               table_idx++;
+       }
+
+       if (detected) {
+               iommu_detected = 1;
+               calgary_detected = 1;
+               printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. "
+                      "TCE table spec is %d.\n", specified_table_size);
+       }
+       return;
+
+cleanup:
+       for (--table_idx; table_idx >= 0; --table_idx)
+               if (tce_table_kva[table_idx])
+                       free_tce_table(tce_table_kva[table_idx]);
+}
+
+int __init calgary_iommu_init(void)
+{
+       int ret;
+
+       if (no_iommu || swiotlb)
+               return -ENODEV;
+
+       if (!calgary_detected)
+               return -ENODEV;
+
+       /* ok, we're trying to use Calgary - let's roll */
+       printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
+
+       ret = calgary_init();
+       if (ret) {
+               printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
+                      "falling back to no_iommu\n", ret);
+               if (end_pfn > MAX_DMA32_PFN)
+                       printk(KERN_ERR "WARNING more than 4GB of memory, "
+                                       "32bit PCI may malfunction.\n");
+               return ret;
+       }
+
+       force_iommu = 1;
+       dma_ops = &calgary_dma_ops;
+
+       return 0;
+}
+
+static int __init calgary_parse_options(char *p)
+{
+       unsigned int bridge;
+       size_t len;
+       char* endp;
+
+       while (*p) {
+               if (!strncmp(p, "64k", 3))
+                       specified_table_size = TCE_TABLE_SIZE_64K;
+               else if (!strncmp(p, "128k", 4))
+                       specified_table_size = TCE_TABLE_SIZE_128K;
+               else if (!strncmp(p, "256k", 4))
+                       specified_table_size = TCE_TABLE_SIZE_256K;
+               else if (!strncmp(p, "512k", 4))
+                       specified_table_size = TCE_TABLE_SIZE_512K;
+               else if (!strncmp(p, "1M", 2))
+                       specified_table_size = TCE_TABLE_SIZE_1M;
+               else if (!strncmp(p, "2M", 2))
+                       specified_table_size = TCE_TABLE_SIZE_2M;
+               else if (!strncmp(p, "4M", 2))
+                       specified_table_size = TCE_TABLE_SIZE_4M;
+               else if (!strncmp(p, "8M", 2))
+                       specified_table_size = TCE_TABLE_SIZE_8M;
+
+               len = strlen("translate_empty_slots");
+               if (!strncmp(p, "translate_empty_slots", len))
+                       translate_empty_slots = 1;
+
+               len = strlen("disable");
+               if (!strncmp(p, "disable", len)) {
+                       p += len;
+                       if (*p == '=')
+                               ++p;
+                       if (*p == '\0')
+                               break;
+                       bridge = simple_strtol(p, &endp, 0);
+                       if (p == endp)
+                               break;
+
+                       if (bridge <= (num_online_nodes() * MAX_PHB_BUS_NUM)) {
+                               printk(KERN_INFO "Calgary: disabling "
+                                      "translation for PHB 0x%x\n", bridge);
+                               set_bit(bridge, translation_disabled);
+                       }
+               }
+
+               p = strpbrk(p, ",");
+               if (!p)
+                       break;
+
+               p++; /* skip ',' */
+       }
+       return 1;
+}
+__setup("calgary=", calgary_parse_options);
index a9275c9557cf432ea3187f806c15fb369d72b312..9c44f4f2433d7b4d77ed1ee9e2192f14b0149228 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <asm/io.h>
 #include <asm/proto.h>
+#include <asm/calgary.h>
 
 int iommu_merge __read_mostly = 0;
 EXPORT_SYMBOL(iommu_merge);
@@ -33,12 +34,15 @@ int panic_on_overflow __read_mostly = 0;
 int force_iommu __read_mostly= 0;
 #endif
 
+/* Set this to 1 if there is a HW IOMMU in the system */
+int iommu_detected __read_mostly = 0;
+
 /* Dummy device used for NULL arguments (normally ISA). Better would
    be probably a smaller DMA mask, but this is bug-to-bug compatible
    to i386. */
 struct device fallback_dev = {
        .bus_id = "fallback device",
-       .coherent_dma_mask = 0xffffffff,
+       .coherent_dma_mask = DMA_32BIT_MASK,
        .dma_mask = &fallback_dev.coherent_dma_mask,
 };
 
@@ -77,7 +81,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
                dev = &fallback_dev;
        dma_mask = dev->coherent_dma_mask;
        if (dma_mask == 0)
-               dma_mask = 0xffffffff;
+               dma_mask = DMA_32BIT_MASK;
 
        /* Don't invoke OOM killer */
        gfp |= __GFP_NORETRY;
@@ -90,7 +94,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
           larger than 16MB and in this case we have a chance of
           finding fitting memory in the next higher zone first. If
           not retry with true GFP_DMA. -AK */
-       if (dma_mask <= 0xffffffff)
+       if (dma_mask <= DMA_32BIT_MASK)
                gfp |= GFP_DMA32;
 
  again:
@@ -111,7 +115,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 
                        /* Don't use the 16MB ZONE_DMA unless absolutely
                           needed. It's better to use remapping first. */
-                       if (dma_mask < 0xffffffff && !(gfp & GFP_DMA)) {
+                       if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
                                gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
                                goto again;
                        }
@@ -174,7 +178,7 @@ int dma_supported(struct device *dev, u64 mask)
        /* Copied from i386. Doesn't make much sense, because it will
           only work for pci_alloc_coherent.
           The caller just has to use GFP_DMA in this case. */
-        if (mask < 0x00ffffff)
+        if (mask < DMA_24BIT_MASK)
                 return 0;
 
        /* Tell the device to use SAC when IOMMU force is on.  This
@@ -189,7 +193,7 @@ int dma_supported(struct device *dev, u64 mask)
           SAC for these.  Assume all masks <= 40 bits are of this
           type. Normally this doesn't make any difference, but gives
           more gentle handling of IOMMU overflow. */
-       if (iommu_sac_force && (mask >= 0xffffffffffULL)) {
+       if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
                printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
                return 0;
        }
@@ -266,7 +270,7 @@ __init int iommu_setup(char *p)
                    swiotlb = 1;
 #endif
 
-#ifdef CONFIG_GART_IOMMU
+#ifdef CONFIG_IOMMU
            gart_parse_options(p);
 #endif
 
@@ -276,3 +280,40 @@ __init int iommu_setup(char *p)
     }
     return 1;
 }
+__setup("iommu=", iommu_setup);
+
+void __init pci_iommu_alloc(void)
+{
+       /*
+        * The order of these functions is important for
+        * fall-back/fail-over reasons
+        */
+#ifdef CONFIG_IOMMU
+       iommu_hole_init();
+#endif
+
+#ifdef CONFIG_CALGARY_IOMMU
+       detect_calgary();
+#endif
+
+#ifdef CONFIG_SWIOTLB
+       pci_swiotlb_init();
+#endif
+}
+
+static int __init pci_iommu_init(void)
+{
+#ifdef CONFIG_CALGARY_IOMMU
+       calgary_iommu_init();
+#endif
+
+#ifdef CONFIG_IOMMU
+       gart_iommu_init();
+#endif
+
+       no_iommu_init();
+       return 0;
+}
+
+/* Must execute after PCI subsystem */
+fs_initcall(pci_iommu_init);
index 82a7c9bfdfa0f3e0bd1945225fd0f86df67b1c96..4ca674d16b09c62b10afc56674e06126cdbce481 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/kdebug.h>
 #include <asm/swiotlb.h>
 #include <asm/dma.h>
+#include <asm/k8.h>
 
 unsigned long iommu_bus_base;  /* GART remapping area (physical) */
 static unsigned long iommu_size;       /* size of remapping area bytes */
@@ -46,8 +47,6 @@ u32 *iommu_gatt_base;                 /* Remapping table */
    also seen with Qlogic at least). */
 int iommu_fullflush = 1;
 
-#define MAX_NB 8
-
 /* Allocation bitmap for the remapping area */ 
 static DEFINE_SPINLOCK(iommu_bitmap_lock);
 static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
@@ -63,13 +62,6 @@ static u32 gart_unmapped_entry;
 #define to_pages(addr,size) \
        (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
 
-#define for_all_nb(dev) \
-       dev = NULL;     \
-       while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)
-
-static struct pci_dev *northbridges[MAX_NB];
-static u32 northbridge_flush_word[MAX_NB];
-
 #define EMERGENCY_PAGES 32 /* = 128KB */ 
 
 #ifdef CONFIG_AGP
@@ -93,7 +85,7 @@ static unsigned long alloc_iommu(int size)
        offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
        if (offset == -1) {
                need_flush = 1;
-               offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
+               offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size);
        }
        if (offset != -1) { 
                set_bit_string(iommu_gart_bitmap, offset, size); 
@@ -120,44 +112,17 @@ static void free_iommu(unsigned long offset, int size)
 /* 
  * Use global flush state to avoid races with multiple flushers.
  */
-static void flush_gart(struct device *dev)
+static void flush_gart(void)
 { 
        unsigned long flags;
-       int flushed = 0;
-       int i, max;
-
        spin_lock_irqsave(&iommu_bitmap_lock, flags);
-       if (need_flush) { 
-               max = 0;
-               for (i = 0; i < MAX_NB; i++) {
-                       if (!northbridges[i]) 
-                               continue;
-                       pci_write_config_dword(northbridges[i], 0x9c, 
-                                              northbridge_flush_word[i] | 1); 
-                       flushed++;
-                       max = i;
-               }
-               for (i = 0; i <= max; i++) {
-                       u32 w;
-                       if (!northbridges[i])
-                               continue;
-                       /* Make sure the hardware actually executed the flush. */
-                       for (;;) { 
-                               pci_read_config_dword(northbridges[i], 0x9c, &w);
-                               if (!(w & 1))
-                                       break;
-                               cpu_relax();
-                       }
-               } 
-               if (!flushed) 
-                       printk("nothing to flush?\n");
+       if (need_flush) {
+               k8_flush_garts();
                need_flush = 0;
        } 
        spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 } 
 
-
-
 #ifdef CONFIG_IOMMU_LEAK
 
 #define SET_LEAK(x) if (iommu_leak_tab) \
@@ -266,7 +231,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf,
                                 size_t size, int dir)
 {
        dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
-       flush_gart(dev);
+       flush_gart();
        return map;
 }
 
@@ -288,6 +253,28 @@ dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
        return bus; 
 }
 
+/*
+ * Free a DMA mapping.
+ */
+void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
+                     size_t size, int direction)
+{
+       unsigned long iommu_page;
+       int npages;
+       int i;
+
+       if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
+           dma_addr >= iommu_bus_base + iommu_size)
+               return;
+       iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
+       npages = to_pages(dma_addr, size);
+       for (i = 0; i < npages; i++) {
+               iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
+               CLEAR_LEAK(iommu_page + i);
+       }
+       free_iommu(iommu_page, npages);
+}
+
 /*
  * Wrapper for pci_unmap_single working with scatterlists.
  */
@@ -299,7 +286,7 @@ void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int di
                struct scatterlist *s = &sg[i];
                if (!s->dma_length || !s->length)
                        break;
-               dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
+               gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
        }
 }
 
@@ -329,7 +316,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
                s->dma_address = addr;
                s->dma_length = s->length;
        }
-       flush_gart(dev);
+       flush_gart();
        return nents;
 }
 
@@ -436,13 +423,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
        if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
                goto error;
        out++;
-       flush_gart(dev);
+       flush_gart();
        if (out < nents) 
                sg[out].dma_length = 0; 
        return out;
 
 error:
-       flush_gart(NULL);
+       flush_gart();
        gart_unmap_sg(dev, sg, nents, dir);
        /* When it was forced or merged try again in a dumb way */
        if (force_iommu || iommu_merge) {
@@ -458,28 +445,6 @@ error:
        return 0;
 } 
 
-/*
- * Free a DMA mapping.
- */ 
-void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
-                     size_t size, int direction)
-{
-       unsigned long iommu_page; 
-       int npages;
-       int i;
-
-       if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || 
-           dma_addr >= iommu_bus_base + iommu_size)
-               return;
-       iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;   
-       npages = to_pages(dma_addr, size);
-       for (i = 0; i < npages; i++) { 
-               iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; 
-               CLEAR_LEAK(iommu_page + i);
-       }
-       free_iommu(iommu_page, npages);
-}
-
 static int no_agp;
 
 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -532,10 +497,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
        void *gatt;
        unsigned aper_base, new_aper_base;
        unsigned aper_size, gatt_size, new_aper_size;
-       
+       int i;
+
        printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
        aper_size = aper_base = info->aper_size = 0;
-       for_all_nb(dev) { 
+       dev = NULL;
+       for (i = 0; i < num_k8_northbridges; i++) {
+               dev = k8_northbridges[i];
                new_aper_base = read_aperture(dev, &new_aper_size); 
                if (!new_aper_base) 
                        goto nommu; 
@@ -558,11 +526,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
                panic("Cannot allocate GATT table"); 
        memset(gatt, 0, gatt_size); 
        agp_gatt_table = gatt;
-       
-       for_all_nb(dev) { 
+
+       for (i = 0; i < num_k8_northbridges; i++) {
                u32 ctl; 
                u32 gatt_reg; 
 
+               dev = k8_northbridges[i];
                gatt_reg = __pa(gatt) >> 12; 
                gatt_reg <<= 4; 
                pci_write_config_dword(dev, 0x98, gatt_reg);
@@ -573,7 +542,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
                pci_write_config_dword(dev, 0x90, ctl); 
        }
-       flush_gart(NULL); 
+       flush_gart();
        
        printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); 
        return 0;
@@ -602,15 +571,19 @@ static struct dma_mapping_ops gart_dma_ops = {
        .unmap_sg = gart_unmap_sg,
 };
 
-static int __init pci_iommu_init(void)
+void __init gart_iommu_init(void)
 { 
        struct agp_kern_info info;
        unsigned long aper_size;
        unsigned long iommu_start;
-       struct pci_dev *dev;
        unsigned long scratch;
        long i;
 
+       if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
+               printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
+               return;
+       }
+
 #ifndef CONFIG_AGP_AMD64
        no_agp = 1; 
 #else
@@ -622,7 +595,11 @@ static int __init pci_iommu_init(void)
 #endif 
 
        if (swiotlb)
-               return -1; 
+               return;
+
+       /* Did we detect a different HW IOMMU? */
+       if (iommu_detected && !iommu_aperture)
+               return;
 
        if (no_iommu ||
            (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
@@ -634,15 +611,7 @@ static int __init pci_iommu_init(void)
                                        "but IOMMU not available.\n"
                               KERN_ERR "WARNING 32bit PCI may malfunction.\n");
                }
-               return -1;
-       }
-
-       i = 0;
-       for_all_nb(dev)
-               i++;
-       if (i > MAX_NB) {
-               printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i);
-               return -1;
+               return;
        }
 
        printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
@@ -707,26 +676,10 @@ static int __init pci_iommu_init(void)
        for (i = EMERGENCY_PAGES; i < iommu_pages; i++) 
                iommu_gatt_base[i] = gart_unmapped_entry;
 
-       for_all_nb(dev) {
-               u32 flag; 
-               int cpu = PCI_SLOT(dev->devfn) - 24;
-               if (cpu >= MAX_NB)
-                       continue;
-               northbridges[cpu] = dev;
-               pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
-               northbridge_flush_word[cpu] = flag; 
-       }
-                    
-       flush_gart(NULL);
-
+       flush_gart();
        dma_ops = &gart_dma_ops;
-
-       return 0;
 } 
 
-/* Must execute after PCI subsystem */
-fs_initcall(pci_iommu_init);
-
 void gart_parse_options(char *p)
 {
        int arg;
index 1f6ecc62061d9121633e53edf672dec81b2da7c3..c4c3cc36ac5b033997d5904fa5adff8fa4de54b8 100644 (file)
@@ -4,6 +4,8 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/string.h>
+#include <linux/dma-mapping.h>
+
 #include <asm/proto.h>
 #include <asm/processor.h>
 #include <asm/dma.h>
@@ -12,10 +14,11 @@ static int
 check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
 {
         if (hwdev && bus + size > *hwdev->dma_mask) {
-               if (*hwdev->dma_mask >= 0xffffffffULL)
+               if (*hwdev->dma_mask >= DMA_32BIT_MASK)
                        printk(KERN_ERR
-                           "nommu_%s: overflow %Lx+%lu of device mask %Lx\n",
-                               name, (long long)bus, size, (long long)*hwdev->dma_mask);
+                           "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
+                               name, (long long)bus, size,
+                               (long long)*hwdev->dma_mask);
                return 0;
        }
        return 1;
index 990ed67896f2ff514cc6283a65fea24fa00adcf1..ebdb77fe20573702facc612fd1262ab3e3d93ade 100644 (file)
@@ -31,7 +31,7 @@ struct dma_mapping_ops swiotlb_dma_ops = {
 void pci_swiotlb_init(void)
 {
        /* don't initialize swiotlb if iommu=off (no_iommu=1) */
-       if (!iommu_aperture && !no_iommu &&
+       if (!iommu_detected && !no_iommu &&
            (end_pfn > MAX_DMA32_PFN || force_iommu))
               swiotlb = 1;
        if (swiotlb) {
index fb903e65e079a26034f76a30ef6f54b40613453d..ca56e19b8b6e25ec64ca2325d07f3040ab0029f3 100644 (file)
@@ -10,7 +10,6 @@
  *     Andi Kleen.
  *
  *     CPU hotplug support - ashok.raj@intel.com
- *  $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
  */
 
 /*
@@ -64,6 +63,7 @@ EXPORT_SYMBOL(boot_option_idle_override);
  * Powermanagement idle function, if any..
  */
 void (*pm_idle)(void);
+EXPORT_SYMBOL(pm_idle);
 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
 
 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -111,7 +111,7 @@ static void default_idle(void)
 {
        local_irq_enable();
 
-       clear_thread_flag(TIF_POLLING_NRFLAG);
+       current_thread_info()->status &= ~TS_POLLING;
        smp_mb__after_clear_bit();
        while (!need_resched()) {
                local_irq_disable();
@@ -120,7 +120,7 @@ static void default_idle(void)
                else
                        local_irq_enable();
        }
-       set_thread_flag(TIF_POLLING_NRFLAG);
+       current_thread_info()->status |= TS_POLLING;
 }
 
 /*
@@ -203,8 +203,7 @@ static inline void play_dead(void)
  */
 void cpu_idle (void)
 {
-       set_thread_flag(TIF_POLLING_NRFLAG);
-
+       current_thread_info()->status |= TS_POLLING;
        /* endless idle loop with no priority at all */
        while (1) {
                while (!need_resched()) {
@@ -335,7 +334,7 @@ void show_regs(struct pt_regs *regs)
 {
        printk("CPU %d:", smp_processor_id());
        __show_regs(regs);
-       show_trace(&regs->rsp);
+       show_trace(NULL, regs, (void *)(regs + 1));
 }
 
 /*
@@ -365,8 +364,11 @@ void flush_thread(void)
        struct task_struct *tsk = current;
        struct thread_info *t = current_thread_info();
 
-       if (t->flags & _TIF_ABI_PENDING)
+       if (t->flags & _TIF_ABI_PENDING) {
                t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
+               if (t->flags & _TIF_IA32)
+                       current_thread_info()->status |= TS_COMPAT;
+       }
 
        tsk->thread.debugreg0 = 0;
        tsk->thread.debugreg1 = 0;
index 57117b8beb2bfd14e7fc51a618f56fff36176826..2d67698474564cf70f43e6121fa8a7941cbf23f2 100644 (file)
@@ -20,6 +20,7 @@
  * Power off function, if any
  */
 void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
 
 static long no_idt[3];
 static enum { 
index 143c65031539825b70f14a68370312ebddf5f9ea..1129918ede826d7319cdb45f85344d520abbd7ca 100644 (file)
@@ -5,8 +5,6 @@
  *
  *  Nov 2001 Dave Jones <davej@suse.de>
  *  Forked from i386 setup code.
- *
- *  $Id$
  */
 
 /*
@@ -65,9 +63,7 @@
 #include <asm/setup.h>
 #include <asm/mach_apic.h>
 #include <asm/numa.h>
-#include <asm/swiotlb.h>
 #include <asm/sections.h>
-#include <asm/gart-mapping.h>
 #include <asm/dmi.h>
 
 /*
@@ -75,6 +71,7 @@
  */
 
 struct cpuinfo_x86 boot_cpu_data __read_mostly;
+EXPORT_SYMBOL(boot_cpu_data);
 
 unsigned long mmu_cr4_features;
 
@@ -103,6 +100,7 @@ char dmi_alloc_data[DMI_MAX_DATA];
  * Setup options
  */
 struct screen_info screen_info;
+EXPORT_SYMBOL(screen_info);
 struct sys_desc_table_struct {
        unsigned short length;
        unsigned char table[0];
@@ -474,80 +472,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 } 
 #endif
 
-/* Use inline assembly to define this because the nops are defined 
-   as inline assembly strings in the include files and we cannot 
-   get them easily into strings. */
-asm("\t.data\nk8nops: " 
-    K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
-    K8_NOP7 K8_NOP8); 
-    
-extern unsigned char k8nops[];
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
-     NULL,
-     k8nops,
-     k8nops + 1,
-     k8nops + 1 + 2,
-     k8nops + 1 + 2 + 3,
-     k8nops + 1 + 2 + 3 + 4,
-     k8nops + 1 + 2 + 3 + 4 + 5,
-     k8nops + 1 + 2 + 3 + 4 + 5 + 6,
-     k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-}; 
-
-extern char __vsyscall_0;
-
-/* Replace instructions with better alternatives for this CPU type.
-
-   This runs before SMP is initialized to avoid SMP problems with
-   self modifying code. This implies that assymetric systems where
-   APs have less capabilities than the boot processor are not handled. 
-   In this case boot with "noreplacement". */ 
-void apply_alternatives(void *start, void *end) 
-{ 
-       struct alt_instr *a; 
-       int diff, i, k;
-       for (a = start; (void *)a < end; a++) { 
-               u8 *instr;
-
-               if (!boot_cpu_has(a->cpuid))
-                       continue;
-
-               BUG_ON(a->replacementlen > a->instrlen); 
-               instr = a->instr;
-               /* vsyscall code is not mapped yet. resolve it manually. */
-               if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
-                       instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
-               __inline_memcpy(instr, a->replacement, a->replacementlen);
-               diff = a->instrlen - a->replacementlen; 
-
-               /* Pad the rest with nops */
-               for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
-                       k = diff;
-                       if (k > ASM_NOP_MAX)
-                               k = ASM_NOP_MAX;
-                       __inline_memcpy(instr + i, k8_nops[k], k);
-               } 
-       }
-} 
-
-static int no_replacement __initdata = 0; 
-void __init alternative_instructions(void)
-{
-       extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-       if (no_replacement) 
-               return;
-       apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
-static int __init noreplacement_setup(char *s)
-{ 
-     no_replacement = 1; 
-     return 1;
-} 
-
-__setup("noreplacement", noreplacement_setup); 
-
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 struct edd edd;
 #ifdef CONFIG_EDD_MODULE
@@ -780,10 +704,6 @@ void __init setup_arch(char **cmdline_p)
 
        e820_setup_gap();
 
-#ifdef CONFIG_GART_IOMMU
-       iommu_hole_init();
-#endif
-
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
        conswitchp = &vga_con;
@@ -868,24 +788,32 @@ static int nearby_node(int apicid)
 static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-       int cpu = smp_processor_id();
        unsigned bits;
 #ifdef CONFIG_NUMA
+       int cpu = smp_processor_id();
        int node = 0;
        unsigned apicid = hard_smp_processor_id();
 #endif
+       unsigned ecx = cpuid_ecx(0x80000008);
+
+       c->x86_max_cores = (ecx & 0xff) + 1;
 
-       bits = 0;
-       while ((1 << bits) < c->x86_max_cores)
-               bits++;
+       /* CPU telling us the core id bits shift? */
+       bits = (ecx >> 12) & 0xF;
+
+       /* Otherwise recompute */
+       if (bits == 0) {
+               while ((1 << bits) < c->x86_max_cores)
+                       bits++;
+       }
 
        /* Low order bits define the core id (index of core in socket) */
-       cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
+       c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
        /* Convert the APIC ID into the socket ID */
-       phys_proc_id[cpu] = phys_pkg_id(bits);
+       c->phys_proc_id = phys_pkg_id(bits);
 
 #ifdef CONFIG_NUMA
-       node = phys_proc_id[cpu];
+       node = c->phys_proc_id;
        if (apicid_to_node[apicid] != NUMA_NO_NODE)
                node = apicid_to_node[apicid];
        if (!node_online(node)) {
@@ -898,7 +826,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
                   but in the same order as the HT nodeids.
                   If that doesn't result in a usable node fall back to the
                   path for the previous case.  */
-               int ht_nodeid = apicid - (phys_proc_id[0] << bits);
+               int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
                if (ht_nodeid >= 0 &&
                    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
                        node = apicid_to_node[ht_nodeid];
@@ -908,15 +836,13 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
        }
        numa_set_node(cpu, node);
 
-       printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n",
-                       cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]);
+       printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
 #endif
 #endif
 }
 
-static int __init init_amd(struct cpuinfo_x86 *c)
+static void __init init_amd(struct cpuinfo_x86 *c)
 {
-       int r;
        unsigned level;
 
 #ifdef CONFIG_SMP
@@ -949,8 +875,8 @@ static int __init init_amd(struct cpuinfo_x86 *c)
        if (c->x86 >= 6)
                set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
 
-       r = get_model_name(c);
-       if (!r) { 
+       level = get_model_name(c);
+       if (!level) {
                switch (c->x86) { 
                case 15:
                        /* Should distinguish Models here, but this is only
@@ -965,13 +891,12 @@ static int __init init_amd(struct cpuinfo_x86 *c)
        if (c->x86_power & (1<<8))
                set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
 
-       if (c->extended_cpuid_level >= 0x80000008) {
-               c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-
+       /* Multi core CPU? */
+       if (c->extended_cpuid_level >= 0x80000008)
                amd_detect_cmp(c);
-       }
 
-       return r;
+       /* Fix cpuid4 emulation for more */
+       num_cache_leaves = 3;
 }
 
 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -979,13 +904,14 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 #ifdef CONFIG_SMP
        u32     eax, ebx, ecx, edx;
        int     index_msb, core_bits;
-       int     cpu = smp_processor_id();
 
        cpuid(1, &eax, &ebx, &ecx, &edx);
 
 
-       if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
+       if (!cpu_has(c, X86_FEATURE_HT))
                return;
+       if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
+               goto out;
 
        smp_num_siblings = (ebx & 0xff0000) >> 16;
 
@@ -1000,10 +926,7 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
                }
 
                index_msb = get_count_order(smp_num_siblings);
-               phys_proc_id[cpu] = phys_pkg_id(index_msb);
-
-               printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
-                      phys_proc_id[cpu]);
+               c->phys_proc_id = phys_pkg_id(index_msb);
 
                smp_num_siblings = smp_num_siblings / c->x86_max_cores;
 
@@ -1011,13 +934,15 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 
                core_bits = get_count_order(c->x86_max_cores);
 
-               cpu_core_id[cpu] = phys_pkg_id(index_msb) &
+               c->cpu_core_id = phys_pkg_id(index_msb) &
                                               ((1 << core_bits) - 1);
-
-               if (c->x86_max_cores > 1)
-                       printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
-                              cpu_core_id[cpu]);
        }
+out:
+       if ((c->x86_max_cores * smp_num_siblings) > 1) {
+               printk(KERN_INFO  "CPU: Physical Processor ID: %d\n", c->phys_proc_id);
+               printk(KERN_INFO  "CPU: Processor Core ID: %d\n", c->cpu_core_id);
+       }
+
 #endif
 }
 
@@ -1026,15 +951,12 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
  */
 static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
 {
-       unsigned int eax;
+       unsigned int eax, t;
 
        if (c->cpuid_level < 4)
                return 1;
 
-       __asm__("cpuid"
-               : "=a" (eax)
-               : "0" (4), "c" (0)
-               : "bx", "dx");
+       cpuid_count(4, 0, &eax, &t, &t, &t);
 
        if (eax & 0x1f)
                return ((eax >> 26) + 1);
@@ -1047,16 +969,17 @@ static void srat_detect_node(void)
 #ifdef CONFIG_NUMA
        unsigned node;
        int cpu = smp_processor_id();
+       int apicid = hard_smp_processor_id();
 
        /* Don't do the funky fallback heuristics the AMD version employs
           for now. */
-       node = apicid_to_node[hard_smp_processor_id()];
+       node = apicid_to_node[apicid];
        if (node == NUMA_NO_NODE)
                node = first_node(node_online_map);
        numa_set_node(cpu, node);
 
        if (acpi_numa > 0)
-               printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
+               printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
 #endif
 }
 
@@ -1066,6 +989,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
        unsigned n;
 
        init_intel_cacheinfo(c);
+       if (c->cpuid_level > 9 ) {
+               unsigned eax = cpuid_eax(10);
+               /* Check for version and the number of counters */
+               if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+                       set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
+       }
+
        n = c->extended_cpuid_level;
        if (n >= 0x80000008) {
                unsigned eax = cpuid_eax(0x80000008);
@@ -1157,7 +1087,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
        }
 
 #ifdef CONFIG_SMP
-       phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+       c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
 #endif
 }
 
@@ -1284,7 +1214,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
                NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
                NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
-               NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow",
+               NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow",
 
                /* Transmeta-defined */
                "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
@@ -1295,7 +1225,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                /* Other (Linux-defined) */
                "cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
                "constant_tsc", NULL, NULL,
-               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 
@@ -1365,9 +1295,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 #ifdef CONFIG_SMP
        if (smp_num_siblings * c->x86_max_cores > 1) {
                int cpu = c - cpu_data;
-               seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
+               seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
                seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
-               seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
+               seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
                seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
        }
 #endif 
@@ -1441,7 +1371,7 @@ struct seq_operations cpuinfo_op = {
        .show = show_cpuinfo,
 };
 
-#ifdef CONFIG_INPUT_PCSPKR
+#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
 #include <linux/platform_device.h>
 static __init int add_pcspkr(void)
 {
index 8a691fa6d3938e0d6473e208d477995b6838055c..f5934cb4a2b60457e71f3ba1a109dc0b67cc66f0 100644 (file)
@@ -3,7 +3,6 @@
  * Copyright (C) 1995  Linus Torvalds
  * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
  * See setup.c for older changelog.
- * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $
  */ 
 #include <linux/config.h>
 #include <linux/init.h>
@@ -31,6 +30,7 @@ char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
 
 struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(_cpu_pda);
 struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
 
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
@@ -38,6 +38,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
 
 unsigned long __supported_pte_mask __read_mostly = ~0UL;
+EXPORT_SYMBOL(__supported_pte_mask);
 static int do_not_nx __cpuinitdata = 0;
 
 /* noexec=on|off
index e5f5ce7909a32e6e0cc3e56bb8b48c1dc7882d1d..28161170fb0aa36dcb16d91f730e83fee9d4409c 100644 (file)
@@ -7,8 +7,6 @@
  *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  *  2000-2002   x86-64 support by Andi Kleen
- * 
- *  $Id: signal.c,v 1.18 2001/10/17 22:30:37 ak Exp $
  */
 
 #include <linux/sched.h>
@@ -239,7 +237,6 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
        rsp = regs->rsp - 128;
 
        /* This is the X/Open sanctioned signal stack switching.  */
-       /* RED-PEN: redzone on that stack? */
        if (ka->sa.sa_flags & SA_ONSTACK) {
                if (sas_ss_flags(rsp) == 0)
                        rsp = current->sas_ss_sp + current->sas_ss_size;
index 4a6628b14d9923149d2d570b5865c36e07f229f2..8188bae9c6d5cdce444a61993e709d74a9e0918e 100644 (file)
@@ -224,6 +224,7 @@ void flush_tlb_current_task(void)
                flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
        preempt_enable();
 }
+EXPORT_SYMBOL(flush_tlb_current_task);
 
 void flush_tlb_mm (struct mm_struct * mm)
 {
@@ -244,6 +245,7 @@ void flush_tlb_mm (struct mm_struct * mm)
 
        preempt_enable();
 }
+EXPORT_SYMBOL(flush_tlb_mm);
 
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
@@ -266,6 +268,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 
        preempt_enable();
 }
+EXPORT_SYMBOL(flush_tlb_page);
 
 static void do_flush_tlb_all(void* info)
 {
@@ -443,6 +446,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
        spin_unlock(&call_lock);
        return 0;
 }
+EXPORT_SYMBOL(smp_call_function);
 
 void smp_stop_cpu(void)
 {
@@ -460,7 +464,7 @@ static void smp_really_stop_cpu(void *dummy)
 {
        smp_stop_cpu(); 
        for (;;) 
-               asm("hlt"); 
+               halt();
 } 
 
 void smp_send_stop(void)
@@ -520,13 +524,13 @@ asmlinkage void smp_call_function_interrupt(void)
 
 int safe_smp_processor_id(void)
 {
-       int apicid, i;
+       unsigned apicid, i;
 
        if (disable_apic)
                return 0;
 
        apicid = hard_smp_processor_id();
-       if (x86_cpu_to_apicid[apicid] == apicid)
+       if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
                return apicid;
 
        for (i = 0; i < NR_CPUS; ++i) {
index 71a7222cf9ce10e2e6d2629699b2aece2669f289..4e9755179ecf57022bcb8e4e6adaa76d62612010 100644 (file)
 
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
-/* Package ID of each logical CPU */
-u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
-/* core ID of each logical CPU */
-u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
+EXPORT_SYMBOL(smp_num_siblings);
 
 /* Last level cache ID of each logical CPU */
 u8 cpu_llc_id[NR_CPUS] __cpuinitdata  = {[0 ... NR_CPUS-1] = BAD_APICID};
+EXPORT_SYMBOL(cpu_llc_id);
 
 /* Bitmask of currently online CPUs */
 cpumask_t cpu_online_map __read_mostly;
@@ -82,18 +80,21 @@ EXPORT_SYMBOL(cpu_online_map);
  */
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
+EXPORT_SYMBOL(cpu_callout_map);
 
 cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 
 /* Per CPU bogomips and other parameters */
 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_data);
 
 /* Set when the idlers are all forked */
 int smp_threads_ready;
 
 /* representing HT siblings of each logical CPU */
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(cpu_sibling_map);
 
 /* representing HT and core siblings of each logical CPU */
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
@@ -472,8 +473,8 @@ static inline void set_cpu_sibling_map(int cpu)
 
        if (smp_num_siblings > 1) {
                for_each_cpu_mask(i, cpu_sibling_setup_map) {
-                       if (phys_proc_id[cpu] == phys_proc_id[i] &&
-                           cpu_core_id[cpu] == cpu_core_id[i]) {
+                       if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
+                           c[cpu].cpu_core_id == c[i].cpu_core_id) {
                                cpu_set(i, cpu_sibling_map[cpu]);
                                cpu_set(cpu, cpu_sibling_map[i]);
                                cpu_set(i, cpu_core_map[cpu]);
@@ -500,7 +501,7 @@ static inline void set_cpu_sibling_map(int cpu)
                        cpu_set(i, c[cpu].llc_shared_map);
                        cpu_set(cpu, c[i].llc_shared_map);
                }
-               if (phys_proc_id[cpu] == phys_proc_id[i]) {
+               if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
                        cpu_set(i, cpu_core_map[cpu]);
                        cpu_set(cpu, cpu_core_map[i]);
                        /*
@@ -797,6 +798,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
        }
 
 
+       alternatives_smp_switch(1);
+
        c_idle.idle = get_idle_for_cpu(cpu);
 
        if (c_idle.idle) {
@@ -1199,8 +1202,8 @@ static void remove_siblinginfo(int cpu)
                cpu_clear(cpu, cpu_sibling_map[sibling]);
        cpus_clear(cpu_sibling_map[cpu]);
        cpus_clear(cpu_core_map[cpu]);
-       phys_proc_id[cpu] = BAD_APICID;
-       cpu_core_id[cpu] = BAD_APICID;
+       c[cpu].phys_proc_id = 0;
+       c[cpu].cpu_core_id = 0;
        cpu_clear(cpu, cpu_sibling_setup_map);
 }
 
@@ -1259,6 +1262,8 @@ void __cpu_die(unsigned int cpu)
                /* They ack this in play_dead by setting CPU_DEAD */
                if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
                        printk ("CPU %d is now offline\n", cpu);
+                       if (1 == num_online_cpus())
+                               alternatives_smp_switch(0);
                        return;
                }
                msleep(100);
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
new file mode 100644 (file)
index 0000000..8d4c67f
--- /dev/null
@@ -0,0 +1,202 @@
+/*
+ * Derived from arch/powerpc/platforms/pseries/iommu.c
+ *
+ * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
+ * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/bootmem.h>
+#include <asm/tce.h>
+#include <asm/calgary.h>
+#include <asm/proto.h>
+
+/* flush a tce at 'tceaddr' to main memory */
+static inline void flush_tce(void* tceaddr)
+{
+       /* a single tce can't cross a cache line */
+       if (cpu_has_clflush)
+               asm volatile("clflush (%0)" :: "r" (tceaddr));
+       else
+               asm volatile("wbinvd":::"memory");
+}
+
+void tce_build(struct iommu_table *tbl, unsigned long index,
+       unsigned int npages, unsigned long uaddr, int direction)
+{
+       u64* tp;
+       u64 t;
+       u64 rpn;
+
+       t = (1 << TCE_READ_SHIFT);
+       if (direction != DMA_TO_DEVICE)
+               t |= (1 << TCE_WRITE_SHIFT);
+
+       tp = ((u64*)tbl->it_base) + index;
+
+       while (npages--) {
+               rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT;
+               t &= ~TCE_RPN_MASK;
+               t |= (rpn << TCE_RPN_SHIFT);
+
+               *tp = cpu_to_be64(t);
+               flush_tce(tp);
+
+               uaddr += PAGE_SIZE;
+               tp++;
+       }
+}
+
+void tce_free(struct iommu_table *tbl, long index, unsigned int npages)
+{
+       u64* tp;
+
+       tp  = ((u64*)tbl->it_base) + index;
+
+       while (npages--) {
+               *tp = cpu_to_be64(0);
+               flush_tce(tp);
+               tp++;
+       }
+}
+
+static inline unsigned int table_size_to_number_of_entries(unsigned char size)
+{
+       /*
+        * size is the order of the table, 0-7
+        * smallest table is 8K entries, so shift result by 13 to
+        * multiply by 8K
+        */
+       return (1 << size) << 13;
+}
+
+static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl)
+{
+       unsigned int bitmapsz;
+       unsigned int tce_table_index;
+       unsigned long bmppages;
+       int ret;
+
+       tbl->it_busno = dev->bus->number;
+
+       /* set the tce table size - measured in entries */
+     &nb