Merge branch 'x86/apic' into x86/core
authorIngo Molnar <mingo@elte.hu>
Thu, 14 Aug 2008 13:13:47 +0000 (15:13 +0200)
committerIngo Molnar <mingo@elte.hu>
Thu, 14 Aug 2008 13:13:47 +0000 (15:13 +0200)
1  2 
arch/x86/kernel/apic_32.c
arch/x86/kernel/apic_64.c
arch/x86/kernel/io_apic_32.c
arch/x86/kernel/io_apic_64.c
include/asm-x86/hw_irq.h

index 84318edad8fbaf92af03638f399b7d974e98efa2,01708f128eeee410283dd61e7528d9a6d9195a15..12b154822bce197ebd4f4e58b6fa1ab97220dadd
@@@ -145,18 -145,13 +145,18 @@@ static int modern_apic(void
        return lapic_get_version() >= 0x14;
  }
  
 -void apic_wait_icr_idle(void)
 +/*
 + * Paravirt kernels also might be using these below ops. So we still
 + * use generic apic_read()/apic_write(), which might be pointing to different
 + * ops in PARAVIRT case.
 + */
 +void xapic_wait_icr_idle(void)
  {
        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
                cpu_relax();
  }
  
 -u32 safe_apic_wait_icr_idle(void)
 +u32 safe_xapic_wait_icr_idle(void)
  {
        u32 send_status;
        int timeout;
        return send_status;
  }
  
 +void xapic_icr_write(u32 low, u32 id)
 +{
 +      apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
 +      apic_write(APIC_ICR, low);
 +}
 +
 +u64 xapic_icr_read(void)
 +{
 +      u32 icr1, icr2;
 +
 +      icr2 = apic_read(APIC_ICR2);
 +      icr1 = apic_read(APIC_ICR);
 +
 +      return icr1 | ((u64)icr2 << 32);
 +}
 +
 +static struct apic_ops xapic_ops = {
 +      .read = native_apic_mem_read,
 +      .write = native_apic_mem_write,
 +      .icr_read = xapic_icr_read,
 +      .icr_write = xapic_icr_write,
 +      .wait_icr_idle = xapic_wait_icr_idle,
 +      .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
 +};
 +
 +struct apic_ops __read_mostly *apic_ops = &xapic_ops;
 +EXPORT_SYMBOL_GPL(apic_ops);
 +
  /**
   * enable_NMI_through_LVT0 - enable NMI through local vector table 0
   */
  void __cpuinit enable_NMI_through_LVT0(void)
  {
-       unsigned int v = APIC_DM_NMI;
+       unsigned int v;
  
-       /* Level triggered for 82489DX */
+       /* unmask and set to NMI */
+       v = APIC_DM_NMI;
+       /* Level triggered for 82489DX (32bit mode) */
        if (!lapic_is_integrated())
                v |= APIC_LVT_LEVEL_TRIGGER;
        apic_write(APIC_LVT0, v);
  }
  
@@@ -226,9 -197,13 +230,13 @@@ int get_physical_broadcast(void
   */
  int lapic_get_maxlvt(void)
  {
-       unsigned int v = apic_read(APIC_LVR);
+       unsigned int v;
  
-       /* 82489DXs do not report # of LVT entries. */
+       v = apic_read(APIC_LVR);
+       /*
+        * - we always have APIC integrated on 64bit mode
+        * - 82489DXs do not report # of LVT entries
+        */
        return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
  }
  
@@@ -1238,7 -1213,7 +1246,7 @@@ void __init init_apic_mappings(void
         * default configuration (or the MP table is broken).
         */
        if (boot_cpu_physical_apicid == -1U)
 -              boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 +              boot_cpu_physical_apicid = read_apic_id();
  
  }
  
@@@ -1275,7 -1250,7 +1283,7 @@@ int __init APIC_init_uniprocessor(void
         * might be zero if read from MP tables. Get it from LAPIC.
         */
  #ifdef CONFIG_CRASH_DUMP
 -      boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 +      boot_cpu_physical_apicid = read_apic_id();
  #endif
        physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
  
@@@ -1354,6 -1329,54 +1362,6 @@@ void smp_error_interrupt(struct pt_reg
        irq_exit();
  }
  
 -#ifdef CONFIG_SMP
 -void __init smp_intr_init(void)
 -{
 -      /*
 -       * IRQ0 must be given a fixed assignment and initialized,
 -       * because it's used before the IO-APIC is set up.
 -       */
 -      set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
 -
 -      /*
 -       * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 -       * IPI, driven by wakeup.
 -       */
 -      alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
 -
 -      /* IPI for invalidation */
 -      alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
 -
 -      /* IPI for generic function call */
 -      alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 -
 -      /* IPI for single call function */
 -      set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
 -                              call_function_single_interrupt);
 -}
 -#endif
 -
 -/*
 - * Initialize APIC interrupts
 - */
 -void __init apic_intr_init(void)
 -{
 -#ifdef CONFIG_SMP
 -      smp_intr_init();
 -#endif
 -      /* self generated IPI for local APIC timer */
 -      alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 -
 -      /* IPI vectors for APIC spurious and error interrupts */
 -      alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
 -      alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 -
 -      /* thermal monitor LVT interrupt */
 -#ifdef CONFIG_X86_MCE_P4THERMAL
 -      alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 -#endif
 -}
 -
  /**
   * connect_bsp_APIC - attach the APIC to the interrupt system
   */
@@@ -1705,19 -1728,15 +1713,19 @@@ static int __init parse_lapic_timer_c2_
  }
  early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
  
 -static int __init apic_set_verbosity(char *str)
 +static int __init apic_set_verbosity(char *arg)
  {
 -      if (strcmp("debug", str) == 0)
 +      if (!arg)
 +              return -EINVAL;
 +
 +      if (strcmp(arg, "debug") == 0)
                apic_verbosity = APIC_DEBUG;
 -      else if (strcmp("verbose", str) == 0)
 +      else if (strcmp(arg, "verbose") == 0)
                apic_verbosity = APIC_VERBOSE;
 -      return 1;
 +
 +      return 0;
  }
 -__setup("apic=", apic_set_verbosity);
 +early_param("apic", apic_set_verbosity);
  
  static int __init lapic_insert_resource(void)
  {
index cd63c0bc61802a5415a3f8917af5e02bffd459ed,7615b4b9c3f3df5721e61bd9711915d1eedfb363..69a876be506fc647df977b64881d4257c623e9c1
@@@ -27,7 -27,6 +27,7 @@@
  #include <linux/clockchips.h>
  #include <linux/acpi_pmtmr.h>
  #include <linux/module.h>
 +#include <linux/dmar.h>
  
  #include <asm/atomic.h>
  #include <asm/smp.h>
@@@ -40,7 -39,6 +40,7 @@@
  #include <asm/proto.h>
  #include <asm/timex.h>
  #include <asm/apic.h>
 +#include <asm/i8259.h>
  
  #include <mach_ipi.h>
  #include <mach_apic.h>
  static int disable_apic_timer __cpuinitdata;
  static int apic_calibrate_pmtmr __initdata;
  int disable_apic;
 +int disable_x2apic;
 +int x2apic;
 +
 +/* x2apic enabled before OS handover */
 +int x2apic_preenabled;
  
  /* Local APIC timer works in C2 */
  int local_apic_timer_c2_ok;
@@@ -126,13 -119,13 +126,13 @@@ static int modern_apic(void
        return lapic_get_version() >= 0x14;
  }
  
 -void apic_wait_icr_idle(void)
 +void xapic_wait_icr_idle(void)
  {
        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
                cpu_relax();
  }
  
 -u32 safe_apic_wait_icr_idle(void)
 +u32 safe_xapic_wait_icr_idle(void)
  {
        u32 send_status;
        int timeout;
        return send_status;
  }
  
 +void xapic_icr_write(u32 low, u32 id)
 +{
 +      apic_write(APIC_ICR2, id << 24);
 +      apic_write(APIC_ICR, low);
 +}
 +
 +u64 xapic_icr_read(void)
 +{
 +      u32 icr1, icr2;
 +
 +      icr2 = apic_read(APIC_ICR2);
 +      icr1 = apic_read(APIC_ICR);
 +
 +      return (icr1 | ((u64)icr2 << 32));
 +}
 +
 +static struct apic_ops xapic_ops = {
 +      .read = native_apic_mem_read,
 +      .write = native_apic_mem_write,
 +      .icr_read = xapic_icr_read,
 +      .icr_write = xapic_icr_write,
 +      .wait_icr_idle = xapic_wait_icr_idle,
 +      .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
 +};
 +
 +struct apic_ops __read_mostly *apic_ops = &xapic_ops;
 +
 +EXPORT_SYMBOL_GPL(apic_ops);
 +
 +static void x2apic_wait_icr_idle(void)
 +{
 +      /* no need to wait for icr idle in x2apic */
 +      return;
 +}
 +
 +static u32 safe_x2apic_wait_icr_idle(void)
 +{
 +      /* no need to wait for icr idle in x2apic */
 +      return 0;
 +}
 +
 +void x2apic_icr_write(u32 low, u32 id)
 +{
 +      wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
 +}
 +
 +u64 x2apic_icr_read(void)
 +{
 +      unsigned long val;
 +
 +      rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
 +      return val;
 +}
 +
 +static struct apic_ops x2apic_ops = {
 +      .read = native_apic_msr_read,
 +      .write = native_apic_msr_write,
 +      .icr_read = x2apic_icr_read,
 +      .icr_write = x2apic_icr_write,
 +      .wait_icr_idle = x2apic_wait_icr_idle,
 +      .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
 +};
 +
  /**
   * enable_NMI_through_LVT0 - enable NMI through local vector table 0
   */
@@@ -220,6 -150,11 +220,11 @@@ void __cpuinit enable_NMI_through_LVT0(
  
        /* unmask and set to NMI */
        v = APIC_DM_NMI;
+       /* Level triggered for 82489DX (32bit mode) */
+       if (!lapic_is_integrated())
+               v |= APIC_LVT_LEVEL_TRIGGER;
        apic_write(APIC_LVT0, v);
  }
  
   */
  int lapic_get_maxlvt(void)
  {
-       unsigned int v, maxlvt;
+       unsigned int v;
  
        v = apic_read(APIC_LVR);
-       maxlvt = GET_APIC_MAXLVT(v);
-       return maxlvt;
+       /*
+        * - we always have APIC integrated on 64bit mode
+        * - 82489DXs do not report # of LVT entries
+        */
+       return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
  }
  
  /*
@@@ -700,10 -638,10 +708,10 @@@ int __init verify_local_APIC(void
        /*
         * The ID register is read/write in a real APIC.
         */
 -      reg0 = read_apic_id();
 +      reg0 = apic_read(APIC_ID);
        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
        apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
 -      reg1 = read_apic_id();
 +      reg1 = apic_read(APIC_ID);
        apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
        apic_write(APIC_ID, reg0);
        if (reg1 != (reg0 ^ APIC_ID_MASK))
@@@ -904,125 -842,6 +912,125 @@@ void __cpuinit end_local_APIC_setup(voi
        apic_pm_activate();
  }
  
 +void check_x2apic(void)
 +{
 +      int msr, msr2;
 +
 +      rdmsr(MSR_IA32_APICBASE, msr, msr2);
 +
 +      if (msr & X2APIC_ENABLE) {
 +              printk("x2apic enabled by BIOS, switching to x2apic ops\n");
 +              x2apic_preenabled = x2apic = 1;
 +              apic_ops = &x2apic_ops;
 +      }
 +}
 +
 +void enable_x2apic(void)
 +{
 +      int msr, msr2;
 +
 +      rdmsr(MSR_IA32_APICBASE, msr, msr2);
 +      if (!(msr & X2APIC_ENABLE)) {
 +              printk("Enabling x2apic\n");
 +              wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
 +      }
 +}
 +
 +void enable_IR_x2apic(void)
 +{
 +#ifdef CONFIG_INTR_REMAP
 +      int ret;
 +      unsigned long flags;
 +
 +      if (!cpu_has_x2apic)
 +              return;
 +
 +      if (!x2apic_preenabled && disable_x2apic) {
 +              printk(KERN_INFO
 +                     "Skipped enabling x2apic and Interrupt-remapping "
 +                     "because of nox2apic\n");
 +              return;
 +      }
 +
 +      if (x2apic_preenabled && disable_x2apic)
 +              panic("Bios already enabled x2apic, can't enforce nox2apic");
 +
 +      if (!x2apic_preenabled && skip_ioapic_setup) {
 +              printk(KERN_INFO
 +                     "Skipped enabling x2apic and Interrupt-remapping "
 +                     "because of skipping io-apic setup\n");
 +              return;
 +      }
 +
 +      ret = dmar_table_init();
 +      if (ret) {
 +              printk(KERN_INFO
 +                     "dmar_table_init() failed with %d:\n", ret);
 +
 +              if (x2apic_preenabled)
 +                      panic("x2apic enabled by bios. But IR enabling failed");
 +              else
 +                      printk(KERN_INFO
 +                             "Not enabling x2apic,Intr-remapping\n");
 +              return;
 +      }
 +
 +      local_irq_save(flags);
 +      mask_8259A();
 +      save_mask_IO_APIC_setup();
 +
 +      ret = enable_intr_remapping(1);
 +
 +      if (ret && x2apic_preenabled) {
 +              local_irq_restore(flags);
 +              panic("x2apic enabled by bios. But IR enabling failed");
 +      }
 +
 +      if (ret)
 +              goto end;
 +
 +      if (!x2apic) {
 +              x2apic = 1;
 +              apic_ops = &x2apic_ops;
 +              enable_x2apic();
 +      }
 +end:
 +      if (ret)
 +              /*
 +               * IR enabling failed
 +               */
 +              restore_IO_APIC_setup();
 +      else
 +              reinit_intr_remapped_IO_APIC(x2apic_preenabled);
 +
 +      unmask_8259A();
 +      local_irq_restore(flags);
 +
 +      if (!ret) {
 +              if (!x2apic_preenabled)
 +                      printk(KERN_INFO
 +                             "Enabled x2apic and interrupt-remapping\n");
 +              else
 +                      printk(KERN_INFO
 +                             "Enabled Interrupt-remapping\n");
 +      } else
 +              printk(KERN_ERR
 +                     "Failed to enable Interrupt-remapping and x2apic\n");
 +#else
 +      if (!cpu_has_x2apic)
 +              return;
 +
 +      if (x2apic_preenabled)
 +              panic("x2apic enabled prior OS handover,"
 +                    " enable CONFIG_INTR_REMAP");
 +
 +      printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
 +             " and x2apic\n");
 +#endif
 +
 +      return;
 +}
 +
  /*
   * Detect and enable local APICs on non-SMP boards.
   * Original code written by Keir Fraser.
@@@ -1062,7 -881,7 +1070,7 @@@ void __init early_init_lapic_mapping(vo
         * Fetch the APIC ID of the BSP in case we have a
         * default configuration (or the MP table is broken).
         */
 -      boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 +      boot_cpu_physical_apicid = read_apic_id();
  }
  
  /**
   */
  void __init init_apic_mappings(void)
  {
 +      if (x2apic) {
 +              boot_cpu_physical_apicid = read_apic_id();
 +              return;
 +      }
 +
        /*
         * If no local APIC can be found then set up a fake all
         * zeroes page to simulate the local APIC and another
         * Fetch the APIC ID of the BSP in case we have a
         * default configuration (or the MP table is broken).
         */
 -      boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 +      boot_cpu_physical_apicid = read_apic_id();
  }
  
  /*
@@@ -1113,9 -927,6 +1121,9 @@@ int __init APIC_init_uniprocessor(void
                return -1;
        }
  
 +      enable_IR_x2apic();
 +      setup_apic_routing();
 +
        verify_local_APIC();
  
        connect_bsp_APIC();
@@@ -1297,11 -1108,6 +1305,11 @@@ void __cpuinit generic_processor_info(i
        cpu_set(cpu, cpu_present_map);
  }
  
 +int hard_smp_processor_id(void)
 +{
 +      return read_apic_id();
 +}
 +
  /*
   * Power management
   */
@@@ -1338,7 -1144,7 +1346,7 @@@ static int lapic_suspend(struct sys_dev
  
        maxlvt = lapic_get_maxlvt();
  
 -      apic_pm_state.apic_id = read_apic_id();
 +      apic_pm_state.apic_id = apic_read(APIC_ID);
        apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
        apic_pm_state.apic_ldr = apic_read(APIC_LDR);
        apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@@ -1373,14 -1179,10 +1381,14 @@@ static int lapic_resume(struct sys_devi
        maxlvt = lapic_get_maxlvt();
  
        local_irq_save(flags);
 -      rdmsr(MSR_IA32_APICBASE, l, h);
 -      l &= ~MSR_IA32_APICBASE_BASE;
 -      l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
 -      wrmsr(MSR_IA32_APICBASE, l, h);
 +      if (!x2apic) {
 +              rdmsr(MSR_IA32_APICBASE, l, h);
 +              l &= ~MSR_IA32_APICBASE_BASE;
 +              l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
 +              wrmsr(MSR_IA32_APICBASE, l, h);
 +      } else
 +              enable_x2apic();
 +
        apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
        apic_write(APIC_ID, apic_pm_state.apic_id);
        apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@@ -1520,15 -1322,6 +1528,15 @@@ __cpuinit int apic_is_clustered_box(voi
        return (clusters > 2);
  }
  
 +static __init int setup_nox2apic(char *str)
 +{
 +      disable_x2apic = 1;
 +      clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
 +      return 0;
 +}
 +early_param("nox2apic", setup_nox2apic);
 +
 +
  /*
   * APIC command line parameters
   */
index a0f4c27c822da91ed4630809b3b726606e4f65e9,d54455ec985039b6cb079a73d718fc8e0cc2e8bf..e710289f673e86503d1ba9d77e1770e3559c1ccd
  #include <asm/nmi.h>
  #include <asm/msidef.h>
  #include <asm/hypertransport.h>
 +#include <asm/setup.h>
  
  #include <mach_apic.h>
  #include <mach_apicdef.h>
  
+ #define __apicdebuginit(type) static type __init
  int (*ioapic_renumber_irq)(int ioapic, int irq);
  atomic_t irq_mis_count;
  
@@@ -58,7 -59,7 +60,7 @@@
  static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
  
  static DEFINE_SPINLOCK(ioapic_lock);
 -static DEFINE_SPINLOCK(vector_lock);
 +DEFINE_SPINLOCK(vector_lock);
  
  int timer_through_8259 __initdata;
  
@@@ -1210,6 -1211,10 +1212,6 @@@ static int assign_irq_vector(int irq
        return vector;
  }
  
 -void setup_vector_irq(int cpu)
 -{
 -}
 -
  static struct irq_chip ioapic_chip;
  
  #define IOAPIC_AUTO   -1
@@@ -1342,7 -1347,8 +1344,8 @@@ static void __init setup_timer_IRQ0_pin
        ioapic_write_entry(apic, pin, entry);
  }
  
- void __init print_IO_APIC(void)
+ __apicdebuginit(void) print_IO_APIC(void)
  {
        int apic, i;
        union IO_APIC_reg_00 reg_00;
        return;
  }
  
- #if 0
- static void print_APIC_bitfield(int base)
+ __apicdebuginit(void) print_APIC_bitfield(int base)
  {
        unsigned int v;
        int i, j;
        }
  }
  
void /*__init*/ print_local_APIC(void *dummy)
__apicdebuginit(void) print_local_APIC(void *dummy)
  {
        unsigned int v, ver, maxlvt;
 +      u64 icr;
  
        if (apic_verbosity == APIC_QUIET)
                return;
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
        printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
 -                      GET_APIC_ID(read_apic_id()));
 +                      GET_APIC_ID(v));
        v = apic_read(APIC_LVR);
        printk(KERN_INFO "... APIC VERSION: %08x\n", v);
        ver = GET_APIC_VERSION(v);
                printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
        }
  
 -      v = apic_read(APIC_ICR);
 -      printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
 -      v = apic_read(APIC_ICR2);
 -      printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
 +      icr = apic_icr_read();
 +      printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
 +      printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
  
        v = apic_read(APIC_LVTT);
        printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
        printk("\n");
  }
  
void print_all_local_APICs(void)
__apicdebuginit(void) print_all_local_APICs(void)
  {
        on_each_cpu(print_local_APIC, NULL, 1);
  }
  
void /*__init*/ print_PIC(void)
__apicdebuginit(void) print_PIC(void)
  {
        unsigned int v;
        unsigned long flags;
        printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
  }
  
- #endif  /*  0  */
+ __apicdebuginit(int) print_all_ICs(void)
+ {
+       print_PIC();
+       print_all_local_APICs();
+       print_IO_APIC();
+       return 0;
+ }
+ fs_initcall(print_all_ICs);
  
  static void __init enable_IO_APIC(void)
  {
@@@ -1699,7 -1713,8 +1710,7 @@@ void disable_IO_APIC(void
                entry.dest_mode       = 0; /* Physical */
                entry.delivery_mode   = dest_ExtINT; /* ExtInt */
                entry.vector          = 0;
 -              entry.dest.physical.physical_dest =
 -                                      GET_APIC_ID(read_apic_id());
 +              entry.dest.physical.physical_dest = read_apic_id();
  
                /*
                 * Add it to the IO-APIC irq-routing table:
@@@ -1725,8 -1740,10 +1736,8 @@@ static void __init setup_ioapic_ids_fro
        unsigned char old_id;
        unsigned long flags;
  
 -#ifdef CONFIG_X86_NUMAQ
 -      if (found_numaq)
 +      if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
                return;
 -#endif
  
        /*
         * Don't check I/O APIC IDs for xAPIC systems.  They have
@@@ -2327,8 -2344,6 +2338,6 @@@ void __init setup_IO_APIC(void
        setup_IO_APIC_irqs();
        init_IO_APIC_traps();
        check_timer();
-       if (!acpi_ioapic)
-               print_IO_APIC();
  }
  
  /*
index e63282e788640402828dc17cf641a9a8c71c003a,8cdcc4f287cc399c1ffa3c8182cbbd4e38c4b45f..a1bec2969c6af666d9566ba9a3ba5bcef0a34ee0
@@@ -37,7 -37,6 +37,7 @@@
  #include <acpi/acpi_bus.h>
  #endif
  #include <linux/bootmem.h>
 +#include <linux/dmar.h>
  
  #include <asm/idle.h>
  #include <asm/io.h>
  #include <asm/nmi.h>
  #include <asm/msidef.h>
  #include <asm/hypertransport.h>
 +#include <asm/irq_remapping.h>
  
  #include <mach_ipi.h>
  #include <mach_apic.h>
  
+ #define __apicdebuginit(type) static type __init
  struct irq_cfg {
        cpumask_t domain;
        cpumask_t old_domain;
@@@ -89,8 -89,6 +91,6 @@@ int first_system_vector = 0xfe
  
  char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
  
- #define __apicdebuginit  __init
  int sis_apic_bug; /* not actually supported, dummy for compile */
  
  static int no_timer_check;
@@@ -103,16 -101,13 +103,16 @@@ int timer_through_8259 __initdata
  static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
  
  static DEFINE_SPINLOCK(ioapic_lock);
 -DEFINE_SPINLOCK(vector_lock);
 +static DEFINE_SPINLOCK(vector_lock);
  
  /*
   * # of IRQ routing registers
   */
  int nr_ioapic_registers[MAX_IO_APICS];
  
 +/* I/O APIC RTE contents at the OS boot up */
 +struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
 +
  /* I/O APIC entries */
  struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
  int nr_ioapics;
@@@ -308,12 -303,7 +308,12 @@@ static void __target_IO_APIC_irq(unsign
                pin = entry->pin;
                if (pin == -1)
                        break;
 -              io_apic_write(apic, 0x11 + pin*2, dest);
 +              /*
 +               * With interrupt-remapping, destination information comes
 +               * from interrupt-remapping table entry.
 +               */
 +              if (!irq_remapped(irq))
 +                      io_apic_write(apic, 0x11 + pin*2, dest);
                reg = io_apic_read(apic, 0x10 + pin*2);
                reg &= ~IO_APIC_REDIR_VECTOR_MASK;
                reg |= vector;
@@@ -450,69 -440,6 +450,69 @@@ static void clear_IO_APIC (void
                        clear_IO_APIC_pin(apic, pin);
  }
  
 +/*
 + * Saves and masks all the unmasked IO-APIC RTE's
 + */
 +int save_mask_IO_APIC_setup(void)
 +{
 +      union IO_APIC_reg_01 reg_01;
 +      unsigned long flags;
 +      int apic, pin;
 +
 +      /*
 +       * The number of IO-APIC IRQ registers (== #pins):
 +       */
 +      for (apic = 0; apic < nr_ioapics; apic++) {
 +              spin_lock_irqsave(&ioapic_lock, flags);
 +              reg_01.raw = io_apic_read(apic, 1);
 +              spin_unlock_irqrestore(&ioapic_lock, flags);
 +              nr_ioapic_registers[apic] = reg_01.bits.entries+1;
 +      }
 +
 +      for (apic = 0; apic < nr_ioapics; apic++) {
 +              early_ioapic_entries[apic] =
 +                      kzalloc(sizeof(struct IO_APIC_route_entry) *
 +                              nr_ioapic_registers[apic], GFP_KERNEL);
 +              if (!early_ioapic_entries[apic])
 +                      return -ENOMEM;
 +      }
 +
 +      for (apic = 0; apic < nr_ioapics; apic++)
 +              for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 +                      struct IO_APIC_route_entry entry;
 +
 +                      entry = early_ioapic_entries[apic][pin] =
 +                              ioapic_read_entry(apic, pin);
 +                      if (!entry.mask) {
 +                              entry.mask = 1;
 +                              ioapic_write_entry(apic, pin, entry);
 +                      }
 +              }
 +      return 0;
 +}
 +
 +void restore_IO_APIC_setup(void)
 +{
 +      int apic, pin;
 +
 +      for (apic = 0; apic < nr_ioapics; apic++)
 +              for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
 +                      ioapic_write_entry(apic, pin,
 +                                         early_ioapic_entries[apic][pin]);
 +}
 +
 +void reinit_intr_remapped_IO_APIC(int intr_remapping)
 +{
 +      /*
 +       * for now plain restore of previous settings.
 +       * TBD: In the case of OS enabling interrupt-remapping,
 +       * IO-APIC RTE's need to be setup to point to interrupt-remapping
 +       * table entries. for now, do a plain restore, and wait for
 +       * the setup_IO_APIC_irqs() to do proper initialization.
 +       */
 +      restore_IO_APIC_setup();
 +}
 +
  int skip_ioapic_setup;
  int ioapic_force;
  
@@@ -770,19 -697,6 +770,19 @@@ static int pin_2_irq(int idx, int apic
        return irq;
  }
  
 +void lock_vector_lock(void)
 +{
 +      /* Used to the online set of cpus does not change
 +       * during assign_irq_vector.
 +       */
 +      spin_lock(&vector_lock);
 +}
 +
 +void unlock_vector_lock(void)
 +{
 +      spin_unlock(&vector_lock);
 +}
 +
  static int __assign_irq_vector(int irq, cpumask_t mask)
  {
        /*
@@@ -888,7 -802,7 +888,7 @@@ static void __clear_irq_vector(int irq
        cpus_clear(cfg->domain);
  }
  
 -static void __setup_vector_irq(int cpu)
 +void __setup_vector_irq(int cpu)
  {
        /* Initialize vector_irq on a new cpu */
        /* This function must be called with vector_lock held */
        }
  }
  
 -void setup_vector_irq(int cpu)
 -{
 -      spin_lock(&vector_lock);
 -      __setup_vector_irq(smp_processor_id());
 -      spin_unlock(&vector_lock);
 -}
 -
 -
  static struct irq_chip ioapic_chip;
 +#ifdef CONFIG_INTR_REMAP
 +static struct irq_chip ir_ioapic_chip;
 +#endif
  
  static void ioapic_register_intr(int irq, unsigned long trigger)
  {
 -      if (trigger) {
 +      if (trigger)
                irq_desc[irq].status |= IRQ_LEVEL;
 -              set_irq_chip_and_handler_name(irq, &ioapic_chip,
 -                                            handle_fasteoi_irq, "fasteoi");
 -      } else {
 +      else
                irq_desc[irq].status &= ~IRQ_LEVEL;
 +
 +#ifdef CONFIG_INTR_REMAP
 +      if (irq_remapped(irq)) {
 +              irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
 +              if (trigger)
 +                      set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
 +                                                    handle_fasteoi_irq,
 +                                                   "fasteoi");
 +              else
 +                      set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
 +                                                    handle_edge_irq, "edge");
 +              return;
 +      }
 +#endif
 +      if (trigger)
 +              set_irq_chip_and_handler_name(irq, &ioapic_chip,
 +                                            handle_fasteoi_irq,
 +                                            "fasteoi");
 +      else
                set_irq_chip_and_handler_name(irq, &ioapic_chip,
                                              handle_edge_irq, "edge");
 +}
 +
 +static int setup_ioapic_entry(int apic, int irq,
 +                            struct IO_APIC_route_entry *entry,
 +                            unsigned int destination, int trigger,
 +                            int polarity, int vector)
 +{
 +      /*
 +       * add it to the IO-APIC irq-routing table:
 +       */
 +      memset(entry,0,sizeof(*entry));
 +
 +#ifdef CONFIG_INTR_REMAP
 +      if (intr_remapping_enabled) {
 +              struct intel_iommu *iommu = map_ioapic_to_ir(apic);
 +              struct irte irte;
 +              struct IR_IO_APIC_route_entry *ir_entry =
 +                      (struct IR_IO_APIC_route_entry *) entry;
 +              int index;
 +
 +              if (!iommu)
 +                      panic("No mapping iommu for ioapic %d\n", apic);
 +
 +              index = alloc_irte(iommu, irq, 1);
 +              if (index < 0)
 +                      panic("Failed to allocate IRTE for ioapic %d\n", apic);
 +
 +              memset(&irte, 0, sizeof(irte));
 +
 +              irte.present = 1;
 +              irte.dst_mode = INT_DEST_MODE;
 +              irte.trigger_mode = trigger;
 +              irte.dlvry_mode = INT_DELIVERY_MODE;
 +              irte.vector = vector;
 +              irte.dest_id = IRTE_DEST(destination);
 +
 +              modify_irte(irq, &irte);
 +
 +              ir_entry->index2 = (index >> 15) & 0x1;
 +              ir_entry->zero = 0;
 +              ir_entry->format = 1;
 +              ir_entry->index = (index & 0x7fff);
 +      } else
 +#endif
 +      {
 +              entry->delivery_mode = INT_DELIVERY_MODE;
 +              entry->dest_mode = INT_DEST_MODE;
 +              entry->dest = destination;
        }
 +
 +      entry->mask = 0;                                /* enable IRQ */
 +      entry->trigger = trigger;
 +      entry->polarity = polarity;
 +      entry->vector = vector;
 +
 +      /* Mask level triggered irqs.
 +       * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
 +       */
 +      if (trigger)
 +              entry->mask = 1;
 +      return 0;
  }
  
  static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
                    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
                    irq, trigger, polarity);
  
 -      /*
 -       * add it to the IO-APIC irq-routing table:
 -       */
 -      memset(&entry,0,sizeof(entry));
  
 -      entry.delivery_mode = INT_DELIVERY_MODE;
 -      entry.dest_mode = INT_DEST_MODE;
 -      entry.dest = cpu_mask_to_apicid(mask);
 -      entry.mask = 0;                         /* enable IRQ */
 -      entry.trigger = trigger;
 -      entry.polarity = polarity;
 -      entry.vector = cfg->vector;
 -
 -      /* Mask level triggered irqs.
 -       * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
 -       */
 -      if (trigger)
 -              entry.mask = 1;
 +      if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
 +                             cpu_mask_to_apicid(mask), trigger, polarity,
 +                             cfg->vector)) {
 +              printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 +                     mp_ioapics[apic].mp_apicid, pin);
 +              __clear_irq_vector(irq);
 +              return;
 +      }
  
        ioapic_register_intr(irq, trigger);
        if (irq < 16)
@@@ -1088,9 -939,6 +1088,9 @@@ static void __init setup_timer_IRQ0_pin
  {
        struct IO_APIC_route_entry entry;
  
 +      if (intr_remapping_enabled)
 +              return;
 +
        memset(&entry, 0, sizeof(entry));
  
        /*
        ioapic_write_entry(apic, pin, entry);
  }
  
- void __apicdebuginit print_IO_APIC(void)
+ __apicdebuginit(void) print_IO_APIC(void)
  {
        int apic, i;
        union IO_APIC_reg_00 reg_00;
        return;
  }
  
- #if 0
- static __apicdebuginit void print_APIC_bitfield (int base)
+ __apicdebuginit(void) print_APIC_bitfield(int base)
  {
        unsigned int v;
        int i, j;
        }
  }
  
void __apicdebuginit print_local_APIC(void * dummy)
__apicdebuginit(void) print_local_APIC(void *dummy)
  {
        unsigned int v, ver, maxlvt;
 +      unsigned long icr;
  
        if (apic_verbosity == APIC_QUIET)
                return;
        printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
                smp_processor_id(), hard_smp_processor_id());
        v = apic_read(APIC_ID);
 -      printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
 +      printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
        v = apic_read(APIC_LVR);
        printk(KERN_INFO "... APIC VERSION: %08x\n", v);
        ver = GET_APIC_VERSION(v);
        v = apic_read(APIC_ESR);
        printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
  
 -      v = apic_read(APIC_ICR);
 -      printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
 -      v = apic_read(APIC_ICR2);
 -      printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
 +      icr = apic_icr_read();
 +      printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
 +      printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
  
        v = apic_read(APIC_LVTT);
        printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
        printk("\n");
  }
  
void print_all_local_APICs (void)
__apicdebuginit(void) print_all_local_APICs(void)
  {
        on_each_cpu(print_local_APIC, NULL, 1);
  }
  
void __apicdebuginit print_PIC(void)
__apicdebuginit(void) print_PIC(void)
  {
        unsigned int v;
        unsigned long flags;
        printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
  }
  
- #endif  /*  0  */
+ __apicdebuginit(int) print_all_ICs(void)
+ {
+       print_PIC();
+       print_all_local_APICs();
+       print_IO_APIC();
+       return 0;
+ }
+ fs_initcall(print_all_ICs);
  
  void __init enable_IO_APIC(void)
  {
@@@ -1438,7 -1295,7 +1447,7 @@@ void disable_IO_APIC(void
                entry.dest_mode       = 0; /* Physical */
                entry.delivery_mode   = dest_ExtINT; /* ExtInt */
                entry.vector          = 0;
 -              entry.dest          = GET_APIC_ID(read_apic_id());
 +              entry.dest            = read_apic_id();
  
                /*
                 * Add it to the IO-APIC irq-routing table:
@@@ -1544,147 -1401,6 +1553,147 @@@ static int ioapic_retrigger_irq(unsigne
   */
  
  #ifdef CONFIG_SMP
 +
 +#ifdef CONFIG_INTR_REMAP
 +static void ir_irq_migration(struct work_struct *work);
 +
 +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
 +
 +/*
 + * Migrate the IO-APIC irq in the presence of intr-remapping.
 + *
 + * For edge triggered, irq migration is a simple atomic update(of vector
 + * and cpu destination) of IRTE and flush the hardware cache.
 + *
 + * For level triggered, we need to modify the io-apic RTE aswell with the update
 + * vector information, along with modifying IRTE with vector and destination.
 + * So irq migration for level triggered is little  bit more complex compared to
 + * edge triggered migration. But the good news is, we use the same algorithm
 + * for level triggered migration as we have today, only difference being,
 + * we now initiate the irq migration from process context instead of the
 + * interrupt context.
 + *
 + * In future, when we do a directed EOI (combined with cpu EOI broadcast
 + * suppression) to the IO-APIC, level triggered irq migration will also be
 + * as simple as edge triggered migration and we can do the irq migration
 + * with a simple atomic update to IO-APIC RTE.
 + */
 +static void migrate_ioapic_irq(int irq, cpumask_t mask)
 +{
 +      struct irq_cfg *cfg = irq_cfg + irq;
 +      struct irq_desc *desc = irq_desc + irq;
 +      cpumask_t tmp, cleanup_mask;
 +      struct irte irte;
 +      int modify_ioapic_rte = desc->status & IRQ_LEVEL;
 +      unsigned int dest;
 +      unsigned long flags;
 +
 +      cpus_and(tmp, mask, cpu_online_map);
 +      if (cpus_empty(tmp))
 +              return;
 +
 +      if (get_irte(irq, &irte))
 +              return;
 +
 +      if (assign_irq_vector(irq, mask))
 +              return;
 +
 +      cpus_and(tmp, cfg->domain, mask);
 +      dest = cpu_mask_to_apicid(tmp);
 +
 +      if (modify_ioapic_rte) {
 +              spin_lock_irqsave(&ioapic_lock, flags);
 +              __target_IO_APIC_irq(irq, dest, cfg->vector);
 +              spin_unlock_irqrestore(&ioapic_lock, flags);
 +      }
 +
 +      irte.vector = cfg->vector;
 +      irte.dest_id = IRTE_DEST(dest);
 +
 +      /*
 +       * Modified the IRTE and flushes the Interrupt entry cache.
 +       */
 +      modify_irte(irq, &irte);
 +
 +      if (cfg->move_in_progress) {
 +              cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
 +              cfg->move_cleanup_count = cpus_weight(cleanup_mask);
 +              send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 +              cfg->move_in_progress = 0;
 +      }
 +
 +      irq_desc[irq].affinity = mask;
 +}
 +
 +static int migrate_irq_remapped_level(int irq)
 +{
 +      int ret = -1;
 +
 +      mask_IO_APIC_irq(irq);
 +
 +      if (io_apic_level_ack_pending(irq)) {
 +              /*
 +               * Interrupt in progress. Migrating irq now will change the
 +               * vector information in the IO-APIC RTE and that will confuse
 +               * the EOI broadcast performed by cpu.
 +               * So, delay the irq migration to the next instance.
 +               */
 +              schedule_delayed_work(&ir_migration_work, 1);
 +              goto unmask;
 +      }
 +
 +      /* everthing is clear. we have right of way */
 +      migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
 +
 +      ret = 0;
 +      irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
 +      cpus_clear(irq_desc[irq].pending_mask);
 +
 +unmask:
 +      unmask_IO_APIC_irq(irq);
 +      return ret;
 +}
 +
 +static void ir_irq_migration(struct work_struct *work)
 +{
 +      int irq;
 +
 +      for (irq = 0; irq < NR_IRQS; irq++) {
 +              struct irq_desc *desc = irq_desc + irq;
 +              if (desc->status & IRQ_MOVE_PENDING) {
 +                      unsigned long flags;
 +
 +                      spin_lock_irqsave(&desc->lock, flags);
 +                      if (!desc->chip->set_affinity ||
 +                          !(desc->status & IRQ_MOVE_PENDING)) {
 +                              desc->status &= ~IRQ_MOVE_PENDING;
 +                              spin_unlock_irqrestore(&desc->lock, flags);
 +                              continue;
 +                      }
 +
 +                      desc->chip->set_affinity(irq,
 +                                               irq_desc[irq].pending_mask);
 +                      spin_unlock_irqrestore(&desc->lock, flags);
 +              }
 +      }
 +}
 +
 +/*
 + * Migrates the IRQ destination in the process context.
 + */
 +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
 +{
 +      if (irq_desc[irq].status & IRQ_LEVEL) {
 +              irq_desc[irq].status |= IRQ_MOVE_PENDING;
 +              irq_desc[irq].pending_mask = mask;
 +              migrate_irq_remapped_level(irq);
 +              return;
 +      }
 +
 +      migrate_ioapic_irq(irq, mask);
 +}
 +#endif
 +
  asmlinkage void smp_irq_move_cleanup_interrupt(void)
  {
        unsigned vector, me;
@@@ -1741,17 -1457,6 +1750,17 @@@ static void irq_complete_move(unsigned 
  #else
  static inline void irq_complete_move(unsigned int irq) {}
  #endif
 +#ifdef CONFIG_INTR_REMAP
 +static void ack_x2apic_level(unsigned int irq)
 +{
 +      ack_x2APIC_irq();
 +}
 +
 +static void ack_x2apic_edge(unsigned int irq)
 +{
 +      ack_x2APIC_irq();
 +}
 +#endif
  
  static void ack_apic_edge(unsigned int irq)
  {
@@@ -1826,21 -1531,6 +1835,21 @@@ static struct irq_chip ioapic_chip __re
        .retrigger      = ioapic_retrigger_irq,
  };
  
 +#ifdef CONFIG_INTR_REMAP
 +static struct irq_chip ir_ioapic_chip __read_mostly = {
 +      .name           = "IR-IO-APIC",
 +      .startup        = startup_ioapic_irq,
 +      .mask           = mask_IO_APIC_irq,
 +      .unmask         = unmask_IO_APIC_irq,
 +      .ack            = ack_x2apic_edge,
 +      .eoi            = ack_x2apic_level,
 +#ifdef CONFIG_SMP
 +      .set_affinity   = set_ir_ioapic_affinity_irq,
 +#endif
 +      .retrigger      = ioapic_retrigger_irq,
 +};
 +#endif
 +
  static inline void init_IO_APIC_traps(void)
  {
        int irq;
@@@ -2026,8 -1716,6 +2035,8 @@@ static inline void __init check_timer(v
         * 8259A.
         */
        if (pin1 == -1) {
 +              if (intr_remapping_enabled)
 +                      panic("BIOS bug: timer not connected to IO-APIC");
                pin1 = pin2;
                apic1 = apic2;
                no_pin1 = 1;
                                clear_IO_APIC_pin(0, pin1);
                        goto out;
                }
 +              if (intr_remapping_enabled)
 +                      panic("timer doesn't work through Interrupt-remapped IO-APIC");
                clear_IO_APIC_pin(apic1, pin1);
                if (!no_pin1)
                        apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@@ -2172,8 -1858,6 +2181,6 @@@ void __init setup_IO_APIC(void
        setup_IO_APIC_irqs();
        init_IO_APIC_traps();
        check_timer();
-       if (!acpi_ioapic)
-               print_IO_APIC();
  }
  
  struct sysfs_ioapic_data {
@@@ -2295,9 -1979,6 +2302,9 @@@ void destroy_irq(unsigned int irq
  
        dynamic_irq_cleanup(irq);
  
 +#ifdef CONFIG_INTR_REMAP
 +      free_irte(irq);
 +#endif
        spin_lock_irqsave(&vector_lock, flags);
        __clear_irq_vector(irq);
        spin_unlock_irqrestore(&vector_lock, flags);
@@@ -2316,41 -1997,10 +2323,41 @@@ static int msi_compose_msg(struct pci_d
  
        tmp = TARGET_CPUS;
        err = assign_irq_vector(irq, tmp);
 -      if (!err) {
 -              cpus_and(tmp, cfg->domain, tmp);
 -              dest = cpu_mask_to_apicid(tmp);
 +      if (err)
 +              return err;
 +
 +      cpus_and(tmp, cfg->domain, tmp);
 +      dest = cpu_mask_to_apicid(tmp);
 +
 +#ifdef CONFIG_INTR_REMAP
 +      if (irq_remapped(irq)) {
 +              struct irte irte;
 +              int ir_index;
 +              u16 sub_handle;
 +
 +              ir_index = map_irq_to_irte_handle(irq, &sub_handle);
 +              BUG_ON(ir_index == -1);
 +
 +              memset (&irte, 0, sizeof(irte));
 +
 +              irte.present = 1;
 +              irte.dst_mode = INT_DEST_MODE;
 +              irte.trigger_mode = 0; /* edge */
 +              irte.dlvry_mode = INT_DELIVERY_MODE;
 +              irte.vector = cfg->vector;
 +              irte.dest_id = IRTE_DEST(dest);
 +
 +              modify_irte(irq, &irte);
  
 +              msg->address_hi = MSI_ADDR_BASE_HI;
 +              msg->data = sub_handle;
 +              msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
 +                                MSI_ADDR_IR_SHV |
 +                                MSI_ADDR_IR_INDEX1(ir_index) |
 +                                MSI_ADDR_IR_INDEX2(ir_index);
 +      } else
 +#endif
 +      {
                msg->address_hi = MSI_ADDR_BASE_HI;
                msg->address_lo =
                        MSI_ADDR_BASE_LO |
@@@ -2401,55 -2051,6 +2408,55 @@@ static void set_msi_irq_affinity(unsign
        write_msi_msg(irq, &msg);
        irq_desc[irq].affinity = mask;
  }
 +
 +#ifdef CONFIG_INTR_REMAP
 +/*
 + * Migrate the MSI irq to another cpumask. This migration is
 + * done in the process context using interrupt-remapping hardware.
 + */
 +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 +{
 +      struct irq_cfg *cfg = irq_cfg + irq;
 +      unsigned int dest;
 +      cpumask_t tmp, cleanup_mask;
 +      struct irte irte;
 +
 +      cpus_and(tmp, mask, cpu_online_map);
 +      if (cpus_empty(tmp))
 +              return;
 +
 +      if (get_irte(irq, &irte))
 +              return;
 +
 +      if (assign_irq_vector(irq, mask))
 +              return;
 +
 +      cpus_and(tmp, cfg->domain, mask);
 +      dest = cpu_mask_to_apicid(tmp);
 +
 +      irte.vector = cfg->vector;
 +      irte.dest_id = IRTE_DEST(dest);
 +
 +      /*
 +       * atomically update the IRTE with the new destination and vector.
 +       */
 +      modify_irte(irq, &irte);
 +
 +      /*
 +       * After this point, all the interrupts will start arriving
 +       * at the new destination. So, time to cleanup the previous
 +       * vector allocation.
 +       */
 +      if (cfg->move_in_progress) {
 +              cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
 +              cfg->move_cleanup_count = cpus_weight(cleanup_mask);
 +              send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
 +              cfg->move_in_progress = 0;
 +      }
 +
 +      irq_desc[irq].affinity = mask;
 +}
 +#endif
  #endif /* CONFIG_SMP */
  
  /*
@@@ -2467,157 -2068,26 +2474,157 @@@ static struct irq_chip msi_chip = 
        .retrigger      = ioapic_retrigger_irq,
  };
  
 -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 +#ifdef CONFIG_INTR_REMAP
 +static struct irq_chip msi_ir_chip = {
 +      .name           = "IR-PCI-MSI",
 +      .unmask         = unmask_msi_irq,
 +      .mask           = mask_msi_irq,
 +      .ack            = ack_x2apic_edge,
 +#ifdef CONFIG_SMP
 +      .set_affinity   = ir_set_msi_irq_affinity,
 +#endif
 +      .retrigger      = ioapic_retrigger_irq,
 +};
 +
 +/*
 + * Map the PCI dev to the corresponding remapping hardware unit
 + * and allocate 'nvec' consecutive interrupt-remapping table entries
 + * in it.
 + */
 +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
 +{
 +      struct intel_iommu *iommu;
 +      int index;
 +
 +      iommu = map_dev_to_ir(dev);
 +      if (!iommu) {
 +              printk(KERN_ERR
 +                     "Unable to map PCI %s to iommu\n", pci_name(dev));
 +              return -ENOENT;
 +      }
 +
 +      index = alloc_irte(iommu, irq, nvec);
 +      if (index < 0) {
 +              printk(KERN_ERR
 +                     "Unable to allocate %d IRTE for PCI %s\n", nvec,
 +                      pci_name(dev));
 +              return -ENOSPC;
 +      }
 +      return index;
 +}
 +#endif
 +
 +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
  {
 +      int ret;
        struct msi_msg msg;
 +
 +      ret = msi_compose_msg(dev, irq, &msg);
 +      if (ret < 0)
 +              return ret;
 +
 +      set_irq_msi(irq, desc);
 +      write_msi_msg(irq, &msg);
 +
 +#ifdef CONFIG_INTR_REMAP
 +      if (irq_remapped(irq)) {
 +              struct irq_desc *desc = irq_desc + irq;
 +              /*
 +               * irq migration in process context
 +               */
 +              desc->status |= IRQ_MOVE_PCNTXT;
 +              set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
 +      } else
 +#endif
 +              set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 +
 +      return 0;
 +}
 +
 +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 +{
        int irq, ret;
 +
        irq = create_irq();
        if (irq < 0)
                return irq;
  
 -      ret = msi_compose_msg(dev, irq, &msg);
 +#ifdef CONFIG_INTR_REMAP
 +      if (!intr_remapping_enabled)
 +              goto no_ir;
 +
 +      ret = msi_alloc_irte(dev, irq, 1);
 +      if (ret < 0)
 +              goto error;
 +no_ir:
 +#endif
 +      ret = setup_msi_irq(dev, desc, irq);
        if (ret < 0) {
                destroy_irq(irq);
                return ret;
        }
 +      return 0;
  
 -      set_irq_msi(irq, desc);
 -      write_msi_msg(irq, &msg);
 +#ifdef CONFIG_INTR_REMAP
 +error:
 +      destroy_irq(irq);
 +      return ret;
 +#endif
 +}
  
 -      set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 +{
 +      int irq, ret, sub_handle;
 +      struct msi_desc *desc;
 +#ifdef CONFIG_INTR_REMAP
 +      struct intel_iommu *iommu = 0;
 +      int index = 0;
 +#endif
  
 +      sub_handle = 0;
 +      list_for_each_entry(desc, &dev->msi_list, list) {
 +              irq = create_irq();
 +              if (irq < 0)
 +                      return irq;
 +#ifdef CONFIG_INTR_REMAP
 +              if (!intr_remapping_enabled)
 +                      goto no_ir;
 +
 +              if (!sub_handle) {
 +                      /*
 +                       * allocate the consecutive block of IRTE's
 +                       * for 'nvec'
 +                       */
 +                      index = msi_alloc_irte(dev, irq, nvec);
 +                      if (index < 0) {
 +                              ret = index;
 +                              goto error;
 +                      }
 +              } else {
 +                      iommu = map_dev_to_ir(dev);
 +                      if (!iommu) {
 +                              ret = -ENOENT;
 +                              goto error;
 +                      }
 +                      /*
 +                       * setup the mapping between the irq and the IRTE
 +                       * base index, the sub_handle pointing to the
 +                       * appropriate interrupt remap table entry.
 +                       */
 +                      set_irte_irq(irq, iommu, index, sub_handle);
 +              }
 +no_ir:
 +#endif
 +              ret = setup_msi_irq(dev, desc, irq);
 +              if (ret < 0)
 +                      goto error;
 +              sub_handle++;
 +      }
        return 0;
 +
 +error:
 +      destroy_irq(irq);
 +      return ret;
  }
  
  void arch_teardown_msi_irq(unsigned int irq)
@@@ -2865,10 -2335,6 +2872,10 @@@ void __init setup_ioapic_dest(void
                                setup_IO_APIC_irq(ioapic, pin, irq,
                                                  irq_trigger(irq_entry),
                                                  irq_polarity(irq_entry));
 +#ifdef CONFIG_INTR_REMAP
 +                      else if (intr_remapping_enabled)
 +                              set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
 +#endif
                        else
                                set_ioapic_affinity_irq(irq, TARGET_CPUS);
                }
diff --combined include/asm-x86/hw_irq.h
index 278571a64c0e5836948402d61a1683cd54d9064a,83e0048dca7804fa10393567ae740d794b89f020..50f6e0316b5029c4c2e5801bcd256119c3f037aa
@@@ -1,5 -1,5 +1,5 @@@
 -#ifndef _ASM_HW_IRQ_H
 -#define _ASM_HW_IRQ_H
 +#ifndef ASM_X86__HW_IRQ_H
 +#define ASM_X86__HW_IRQ_H
  
  /*
   * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
@@@ -64,7 -64,6 +64,6 @@@ extern unsigned long io_apic_irqs
  extern void init_VISWS_APIC_irqs(void);
  extern void setup_IO_APIC(void);
  extern void disable_IO_APIC(void);
- extern void print_IO_APIC(void);
  extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
  extern void setup_ioapic_dest(void);
  
@@@ -73,9 -72,7 +72,9 @@@ extern void enable_IO_APIC(void)
  #endif
  
  /* IPI functions */
 +#ifdef CONFIG_X86_32
  extern void send_IPI_self(int vector);
 +#endif
  extern void send_IPI(int dest, int vector);
  
  /* Statistics */
@@@ -95,43 -92,15 +94,43 @@@ extern asmlinkage void qic_reschedule_i
  extern asmlinkage void qic_enable_irq_interrupt(void);
  extern asmlinkage void qic_call_function_interrupt(void);
  
 +/* SMP */
 +extern void smp_apic_timer_interrupt(struct pt_regs *);
 +#ifdef CONFIG_X86_32
 +extern void smp_spurious_interrupt(struct pt_regs *);
 +extern void smp_error_interrupt(struct pt_regs *);
 +#else
 +extern asmlinkage void smp_spurious_interrupt(void);
 +extern asmlinkage void smp_error_interrupt(void);
 +#endif
 +#ifdef CONFIG_X86_SMP
 +extern void smp_reschedule_interrupt(struct pt_regs *);
 +extern void smp_call_function_interrupt(struct pt_regs *);
 +extern void smp_call_function_single_interrupt(struct pt_regs *);
 +#ifdef CONFIG_X86_32
 +extern void smp_invalidate_interrupt(struct pt_regs *);
 +#else
 +extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
 +#endif
 +#endif
 +
  #ifdef CONFIG_X86_32
  extern void (*const interrupt[NR_IRQS])(void);
  #else
  typedef int vector_irq_t[NR_VECTORS];
  DECLARE_PER_CPU(vector_irq_t, vector_irq);
 -extern spinlock_t vector_lock;
  #endif
 -extern void setup_vector_irq(int cpu);
 +
 +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_X86_64)
 +extern void lock_vector_lock(void);
 +extern void unlock_vector_lock(void);
 +extern void __setup_vector_irq(int cpu);
 +#else
 +static inline void lock_vector_lock(void) {}
 +static inline void unlock_vector_lock(void) {}
 +static inline void __setup_vector_irq(int cpu) {}
 +#endif
  
  #endif /* !ASSEMBLY_ */
  
 -#endif
 +#endif /* ASM_X86__HW_IRQ_H */