Merge branches 'x86/urgent', 'x86/amd-iommu', 'x86/apic', 'x86/cleanups', 'x86/core...
Ingo Molnar [Mon, 21 Jul 2008 14:37:17 +0000 (16:37 +0200)]
26 files changed:
1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20 
Documentation/kernel-parameters.txt
arch/x86/Kconfig.debug
arch/x86/ia32/ia32entry.S
arch/x86/kernel/amd_iommu.c
arch/x86/kernel/amd_iommu_init.c
arch/x86/kernel/apic_32.c
arch/x86/kernel/apic_64.c
arch/x86/kernel/cpu/common_64.c
arch/x86/kernel/early-quirks.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/nmi.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/pci-dma.c
arch/x86/kernel/pci-gart_64.c
arch/x86/kernel/process.c
arch/x86/kernel/setup.c
arch/x86/kernel/signal_32.c
arch/x86/kernel/smpboot.c
arch/x86/mm/init_32.c
arch/x86/mm/pat.c
arch/x86/pci/pci.h
arch/x86/xen/enlighten.c
drivers/pci/intel-iommu.c
include/asm-x86/paravirt.h
include/asm-x86/setup.h

Simple merge
Simple merge
                        .quad 1b,ia32_badarg
                        .previous       
                        GET_THREAD_INFO(%r10)
          -             orl    $TS_COMPAT,threadinfo_status(%r10)
          -             testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
          +             orl    $TS_COMPAT,TI_status(%r10)
---------- ---- ----    testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----             TI_flags(%r10)
+++++++++++++++ ++++    testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
                        CFI_REMEMBER_STATE
                        jnz  sysenter_tracesys
--------------- ----sysenter_do_call:   
                        cmpl    $(IA32_NR_syscalls-1),%eax
                        ja      ia32_badsys
+++++++++++++++ ++++sysenter_do_call:
                        IA32_ARG_FIXUP 1
                        call    *ia32_sys_call_table(,%rax,8)
                        movq    %rax,RAX-ARGOFFSET(%rsp)
@@@@@@@@@@@@@@@@@@@@@ -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -230,8 -241,9 -241,9 -241,9 -241,9 -244,8 -241,9 -241,9 -241,9 -241,9 +244,8 @@@@@@@@@@@@@@@@@@@@@ ENTRY(ia32_cstar_target
                        .quad 1b,ia32_badarg
                        .previous       
                        GET_THREAD_INFO(%r10)
          -             orl   $TS_COMPAT,threadinfo_status(%r10)
          -             testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
          +             orl   $TS_COMPAT,TI_status(%r10)
---------- ---- ----    testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----            TI_flags(%r10)
+++++++++++++++ ++++    testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
                        CFI_REMEMBER_STATE
                        jnz   cstar_tracesys
                    cstar_do_call:      
@@@@@@@@@@@@@@@@@@@@@ -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -310,7 -321,7 -321,7 -321,7 -321,7 -323,7 -321,7 -321,7 -321,7 -321,8 +323,8 @@@@@@@@@@@@@@@@@@@@@ ENTRY(ia32_syscall
                        /*CFI_REL_OFFSET        rflags,EFLAGS-RIP*/
                        /*CFI_REL_OFFSET        cs,CS-RIP*/
                        CFI_REL_OFFSET  rip,RIP-RIP
          -             swapgs
+++++++++++++++++++     PARAVIRT_ADJUST_EXCEPTION_FRAME
          +             SWAPGS
                        /*
                         * No need to follow this irqs on/off section: the syscall
                         * disabled irqs and here we enable it straight after entry:
                           this could be a problem. */
                        SAVE_ARGS 0,0,1
                        GET_THREAD_INFO(%r10)
          -             orl   $TS_COMPAT,threadinfo_status(%r10)
          -             testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
          +             orl   $TS_COMPAT,TI_status(%r10)
---------- ---- ----    testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----            TI_flags(%r10)
+++++++++++++++ ++++    testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
                        jnz ia32_tracesys
                    ia32_do_syscall:    
                        cmpl $(IA32_NR_syscalls-1),%eax
index f2766d8,8c3deb0,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,cf2f74b,f2766d8,f2766d8,0000000,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8..c25210e
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- /dev/null
          +         /*
          +          * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
          +          * Author: Joerg Roedel <joerg.roedel@amd.com>
          +          *         Leo Duran <leo.duran@amd.com>
          +          *
          +          * This program is free software; you can redistribute it and/or modify it
          +          * under the terms of the GNU General Public License version 2 as published
          +          * by the Free Software Foundation.
          +          *
          +          * This program is distributed in the hope that it will be useful,
          +          * but WITHOUT ANY WARRANTY; without even the implied warranty of
          +          * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
          +          * GNU General Public License for more details.
          +          *
          +          * You should have received a copy of the GNU General Public License
          +          * along with this program; if not, write to the Free Software
          +          * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
          +          */
          +         
          +         #include <linux/pci.h>
          +         #include <linux/gfp.h>
          +         #include <linux/bitops.h>
          +         #include <linux/scatterlist.h>
          +         #include <linux/iommu-helper.h>
          +         #include <asm/proto.h>
------- -- ---------#include <asm/gart.h>
+++++++ ++++++++++++#include <asm/iommu.h>
          +         #include <asm/amd_iommu_types.h>
          +         #include <asm/amd_iommu.h>
          +         
          +         #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
          +         
          +         #define to_pages(addr, size) \
          +              (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
          +         
+ ++++++++++++++++++#define EXIT_LOOP_COUNT 10000000
+ ++++++++++++++++++
          +         static DEFINE_RWLOCK(amd_iommu_devtable_lock);
          +         
- -------- ---------struct command {
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * general struct to manage commands send to an IOMMU
+ ++++++++++++++++++ */
+ ++++++++++++++++++struct iommu_cmd {
          +             u32 data[4];
          +         };
          +         
          +         static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
          +                                  struct unity_map_entry *e);
          +         
+ ++++++++++++++++++/* returns !0 if the IOMMU is caching non-present entries in its TLB */
          +         static int iommu_has_npcache(struct amd_iommu *iommu)
          +         {
          +             return iommu->cap & IOMMU_CAP_NPCACHE;
          +         }
          +         
- -------- ---------static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * IOMMU command queuing functions
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Writes the command to the IOMMUs command buffer and informs the
+ ++++++++++++++++++ * hardware about the new command. Must be called with iommu->lock held.
+ ++++++++++++++++++ */
+ ++++++++++++++++++static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
          +         {
          +             u32 tail, head;
          +             u8 *target;
          +         
          +             tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
          +             target = (iommu->cmd_buf + tail);
          +             memcpy_toio(target, cmd, sizeof(*cmd));
          +             tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
          +             head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
          +             if (tail == head)
          +                     return -ENOMEM;
          +             writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
          +         
          +             return 0;
          +         }
          +         
- -------- ---------static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * General queuing function for commands. Takes iommu->lock and calls
+ ++++++++++++++++++ * __iommu_queue_command().
+ ++++++++++++++++++ */
+ ++++++++++++++++++static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
          +         {
          +             unsigned long flags;
          +             int ret;
          +         
          +             spin_lock_irqsave(&iommu->lock, flags);
          +             ret = __iommu_queue_command(iommu, cmd);
          +             spin_unlock_irqrestore(&iommu->lock, flags);
          +         
          +             return ret;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function is called whenever we need to ensure that the IOMMU has
+ ++++++++++++++++++ * completed execution of all commands we sent. It sends a
+ ++++++++++++++++++ * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
+ ++++++++++++++++++ * us about that by writing a value to a physical address we pass with
+ ++++++++++++++++++ * the command.
+ ++++++++++++++++++ */
          +         static int iommu_completion_wait(struct amd_iommu *iommu)
          +         {
          +             int ret;
- -------- ---------    struct command cmd;
+ ++++++++++++++++++    struct iommu_cmd cmd;
          +             volatile u64 ready = 0;
          +             unsigned long ready_phys = virt_to_phys(&ready);
+ ++++++++++++++++++    unsigned long i = 0;
          +         
          +             memset(&cmd, 0, sizeof(cmd));
          +             cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
- -------- ---------    cmd.data[1] = HIGH_U32(ready_phys);
+ ++++++++++++++++++    cmd.data[1] = upper_32_bits(ready_phys);
          +             cmd.data[2] = 1; /* value written to 'ready' */
          +             CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
          +         
          +             iommu->need_sync = 0;
          +         
          +             ret = iommu_queue_command(iommu, &cmd);
          +         
          +             if (ret)
          +                     return ret;
          +         
- -------- ---------    while (!ready)
+ ++++++++++++++++++    while (!ready && (i < EXIT_LOOP_COUNT)) {
+ ++++++++++++++++++            ++i;
          +                     cpu_relax();
+ ++++++++++++++++++    }
+ ++++++++++++++++++
+ ++++++++++++++++++    if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
+ ++++++++++++++++++            printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Command send function for invalidating a device table entry
+ ++++++++++++++++++ */
          +         static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
          +         {
- -------- ---------    struct command cmd;
+ ++++++++++++++++++    struct iommu_cmd cmd;
          +         
          +             BUG_ON(iommu == NULL);
          +         
          +             memset(&cmd, 0, sizeof(cmd));
          +             CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
          +             cmd.data[0] = devid;
          +         
          +             iommu->need_sync = 1;
          +         
          +             return iommu_queue_command(iommu, &cmd);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Generic command send function for invalidaing TLB entries
+ ++++++++++++++++++ */
          +         static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
          +                     u64 address, u16 domid, int pde, int s)
          +         {
- -------- ---------    struct command cmd;
+ ++++++++++++++++++    struct iommu_cmd cmd;
          +         
          +             memset(&cmd, 0, sizeof(cmd));
          +             address &= PAGE_MASK;
          +             CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
          +             cmd.data[1] |= domid;
          +             cmd.data[2] = LOW_U32(address);
- -------- ---------    cmd.data[3] = HIGH_U32(address);
- -------- ---------    if (s)
+ ++++++++++++++++++    cmd.data[3] = upper_32_bits(address);
+ ++++++++++++++++++    if (s) /* size bit - we flush more than one 4kb page */
          +                     cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
- -------- ---------    if (pde)
+ ++++++++++++++++++    if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
          +                     cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
          +         
          +             iommu->need_sync = 1;
          +         
          +             return iommu_queue_command(iommu, &cmd);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * TLB invalidation function which is called from the mapping functions.
+ ++++++++++++++++++ * It invalidates a single PTE if the range to flush is within a single
+ ++++++++++++++++++ * page. Otherwise it flushes the whole TLB of the IOMMU.
+ ++++++++++++++++++ */
          +         static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
          +                     u64 address, size_t size)
          +         {
          +             int s = 0;
          +             unsigned pages = to_pages(address, size);
          +         
          +             address &= PAGE_MASK;
          +         
          +             if (pages > 1) {
          +                     /*
          +                      * If we have to flush more than one page, flush all
          +                      * TLB entries for this domain
          +                      */
          +                     address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
          +                     s = 1;
          +             }
          +         
          +             iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The functions below are used the create the page table mappings for
+ ++++++++++++++++++ * unity mapped regions.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Generic mapping functions. It maps a physical address into a DMA
+ ++++++++++++++++++ * address space. It allocates the page table pages if necessary.
+ ++++++++++++++++++ * In the future it can be extended to a generic mapping function
+ ++++++++++++++++++ * supporting all features of AMD IOMMU page tables like level skipping
+ ++++++++++++++++++ * and full 64 bit address spaces.
+ ++++++++++++++++++ */
          +         static int iommu_map(struct protection_domain *dom,
          +                          unsigned long bus_addr,
          +                          unsigned long phys_addr,
          +                          int prot)
          +         {
          +             u64 __pte, *pte, *page;
          +         
          +             bus_addr  = PAGE_ALIGN(bus_addr);
          +             phys_addr = PAGE_ALIGN(bus_addr);
          +         
          +             /* only support 512GB address spaces for now */
          +             if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
          +                     return -EINVAL;
          +         
          +             pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
          +         
          +             if (!IOMMU_PTE_PRESENT(*pte)) {
          +                     page = (u64 *)get_zeroed_page(GFP_KERNEL);
          +                     if (!page)
          +                             return -ENOMEM;
          +                     *pte = IOMMU_L2_PDE(virt_to_phys(page));
          +             }
          +         
          +             pte = IOMMU_PTE_PAGE(*pte);
          +             pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
          +         
          +             if (!IOMMU_PTE_PRESENT(*pte)) {
          +                     page = (u64 *)get_zeroed_page(GFP_KERNEL);
          +                     if (!page)
          +                             return -ENOMEM;
          +                     *pte = IOMMU_L1_PDE(virt_to_phys(page));
          +             }
          +         
          +             pte = IOMMU_PTE_PAGE(*pte);
          +             pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
          +         
          +             if (IOMMU_PTE_PRESENT(*pte))
          +                     return -EBUSY;
          +         
          +             __pte = phys_addr | IOMMU_PTE_P;
          +             if (prot & IOMMU_PROT_IR)
          +                     __pte |= IOMMU_PTE_IR;
          +             if (prot & IOMMU_PROT_IW)
          +                     __pte |= IOMMU_PTE_IW;
          +         
          +             *pte = __pte;
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function checks if a specific unity mapping entry is needed for
+ ++++++++++++++++++ * this specific IOMMU.
+ ++++++++++++++++++ */
          +         static int iommu_for_unity_map(struct amd_iommu *iommu,
          +                                    struct unity_map_entry *entry)
          +         {
          +             u16 bdf, i;
          +         
          +             for (i = entry->devid_start; i <= entry->devid_end; ++i) {
          +                     bdf = amd_iommu_alias_table[i];
          +                     if (amd_iommu_rlookup_table[bdf] == iommu)
          +                             return 1;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Init the unity mappings for a specific IOMMU in the system
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * Basically iterates over all unity mapping entries and applies them to
+ ++++++++++++++++++ * the default domain DMA of that IOMMU if necessary.
+ ++++++++++++++++++ */
          +         static int iommu_init_unity_mappings(struct amd_iommu *iommu)
          +         {
          +             struct unity_map_entry *entry;
          +             int ret;
          +         
          +             list_for_each_entry(entry, &amd_iommu_unity_map, list) {
          +                     if (!iommu_for_unity_map(iommu, entry))
          +                             continue;
          +                     ret = dma_ops_unity_map(iommu->default_dom, entry);
          +                     if (ret)
          +                             return ret;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function actually applies the mapping to the page table of the
+ ++++++++++++++++++ * dma_ops domain.
+ ++++++++++++++++++ */
          +         static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
          +                                  struct unity_map_entry *e)
          +         {
          +             u64 addr;
          +             int ret;
          +         
          +             for (addr = e->address_start; addr < e->address_end;
          +                  addr += PAGE_SIZE) {
          +                     ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
          +                     if (ret)
          +                             return ret;
          +                     /*
          +                      * if unity mapping is in aperture range mark the page
          +                      * as allocated in the aperture
          +                      */
          +                     if (addr < dma_dom->aperture_size)
          +                             __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Inits the unity mappings required for a specific device
+ ++++++++++++++++++ */
          +         static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
          +                                               u16 devid)
          +         {
          +             struct unity_map_entry *e;
          +             int ret;
          +         
          +             list_for_each_entry(e, &amd_iommu_unity_map, list) {
          +                     if (!(devid >= e->devid_start && devid <= e->devid_end))
          +                             continue;
          +                     ret = dma_ops_unity_map(dma_dom, e);
          +                     if (ret)
          +                             return ret;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the address allocator for the dma_ops
+ ++++++++++++++++++ * interface functions. They work like the allocators in the other IOMMU
+ ++++++++++++++++++ * drivers. Its basically a bitmap which marks the allocated pages in
+ ++++++++++++++++++ * the aperture. Maybe it could be enhanced in the future to a more
+ ++++++++++++++++++ * efficient allocator.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
          +         static unsigned long dma_mask_to_pages(unsigned long mask)
          +         {
          +             return (mask >> PAGE_SHIFT) +
          +                     (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The address allocator core function.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * called with domain->lock held
+ ++++++++++++++++++ */
          +         static unsigned long dma_ops_alloc_addresses(struct device *dev,
          +                                                  struct dma_ops_domain *dom,
          +                                                  unsigned int pages)
          +         {
          +             unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
          +             unsigned long address;
          +             unsigned long size = dom->aperture_size >> PAGE_SHIFT;
          +             unsigned long boundary_size;
          +         
          +             boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
          +                             PAGE_SIZE) >> PAGE_SHIFT;
          +             limit = limit < size ? limit : size;
          +         
          +             if (dom->next_bit >= limit)
          +                     dom->next_bit = 0;
          +         
          +             address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
          +                             0 , boundary_size, 0);
          +             if (address == -1)
          +                     address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
          +                                     0, boundary_size, 0);
          +         
          +             if (likely(address != -1)) {
          +                     dom->next_bit = address + pages;
          +                     address <<= PAGE_SHIFT;
          +             } else
          +                     address = bad_dma_address;
          +         
          +             WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
          +         
          +             return address;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The address free function.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * called with domain->lock held
+ ++++++++++++++++++ */
          +         static void dma_ops_free_addresses(struct dma_ops_domain *dom,
          +                                        unsigned long address,
          +                                        unsigned int pages)
          +         {
          +             address >>= PAGE_SHIFT;
          +             iommu_area_free(dom->bitmap, address, pages);
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the domain allocation. A domain is
+ ++++++++++++++++++ * allocated for every IOMMU as the default domain. If device isolation
+ ++++++++++++++++++ * is enabled, every device get its own domain. The most important thing
+ ++++++++++++++++++ * about domains is the page table mapping the DMA address space they
+ ++++++++++++++++++ * contain.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
          +         static u16 domain_id_alloc(void)
          +         {
          +             unsigned long flags;
          +             int id;
          +         
          +             write_lock_irqsave(&amd_iommu_devtable_lock, flags);
          +             id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
          +             BUG_ON(id == 0);
          +             if (id > 0 && id < MAX_DOMAIN_ID)
          +                     __set_bit(id, amd_iommu_pd_alloc_bitmap);
          +             else
          +                     id = 0;
          +             write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
          +         
          +             return id;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Used to reserve address ranges in the aperture (e.g. for exclusion
+ ++++++++++++++++++ * ranges.
+ ++++++++++++++++++ */
          +         static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
          +                                           unsigned long start_page,
          +                                           unsigned int pages)
          +         {
          +             unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
          +         
          +             if (start_page + pages > last_page)
          +                     pages = last_page - start_page;
          +         
          +             set_bit_string(dom->bitmap, start_page, pages);
          +         }
          +         
          +         static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
          +         {
          +             int i, j;
          +             u64 *p1, *p2, *p3;
          +         
          +             p1 = dma_dom->domain.pt_root;
          +         
          +             if (!p1)
          +                     return;
          +         
          +             for (i = 0; i < 512; ++i) {
          +                     if (!IOMMU_PTE_PRESENT(p1[i]))
          +                             continue;
          +         
          +                     p2 = IOMMU_PTE_PAGE(p1[i]);
          +                     for (j = 0; j < 512; ++i) {
          +                             if (!IOMMU_PTE_PRESENT(p2[j]))
          +                                     continue;
          +                             p3 = IOMMU_PTE_PAGE(p2[j]);
          +                             free_page((unsigned long)p3);
          +                     }
          +         
          +                     free_page((unsigned long)p2);
          +             }
          +         
          +             free_page((unsigned long)p1);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Free a domain, only used if something went wrong in the
+ ++++++++++++++++++ * allocation path and we need to free an already allocated page table
+ ++++++++++++++++++ */
          +         static void dma_ops_domain_free(struct dma_ops_domain *dom)
          +         {
          +             if (!dom)
          +                     return;
          +         
          +             dma_ops_free_pagetable(dom);
          +         
          +             kfree(dom->pte_pages);
          +         
          +             kfree(dom->bitmap);
          +         
          +             kfree(dom);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Allocates a new protection domain usable for the dma_ops functions.
+ ++++++++++++++++++ * It also intializes the page table and the address allocator data
+ ++++++++++++++++++ * structures required for the dma_ops interface
+ ++++++++++++++++++ */
          +         static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
          +                                                        unsigned order)
          +         {
          +             struct dma_ops_domain *dma_dom;
          +             unsigned i, num_pte_pages;
          +             u64 *l2_pde;
          +             u64 address;
          +         
          +             /*
          +              * Currently the DMA aperture must be between 32 MB and 1GB in size
          +              */
          +             if ((order < 25) || (order > 30))
          +                     return NULL;
          +         
          +             dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
          +             if (!dma_dom)
          +                     return NULL;
          +         
          +             spin_lock_init(&dma_dom->domain.lock);
          +         
          +             dma_dom->domain.id = domain_id_alloc();
          +             if (dma_dom->domain.id == 0)
          +                     goto free_dma_dom;
          +             dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
          +             dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
          +             dma_dom->domain.priv = dma_dom;
          +             if (!dma_dom->domain.pt_root)
          +                     goto free_dma_dom;
          +             dma_dom->aperture_size = (1ULL << order);
          +             dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
          +                                       GFP_KERNEL);
          +             if (!dma_dom->bitmap)
          +                     goto free_dma_dom;
          +             /*
          +              * mark the first page as allocated so we never return 0 as
          +              * a valid dma-address. So we can use 0 as error value
          +              */
          +             dma_dom->bitmap[0] = 1;
          +             dma_dom->next_bit = 0;
          +         
+ ++++++++++++++++++    /* Intialize the exclusion range if necessary */
          +             if (iommu->exclusion_start &&
          +                 iommu->exclusion_start < dma_dom->aperture_size) {
          +                     unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
          +                     int pages = to_pages(iommu->exclusion_start,
          +                                     iommu->exclusion_length);
          +                     dma_ops_reserve_addresses(dma_dom, startpage, pages);
          +             }
          +         
+ ++++++++++++++++++    /*
+ ++++++++++++++++++     * At the last step, build the page tables so we don't need to
+ ++++++++++++++++++     * allocate page table pages in the dma_ops mapping/unmapping
+ ++++++++++++++++++     * path.
+ ++++++++++++++++++     */
          +             num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
          +             dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
          +                             GFP_KERNEL);
          +             if (!dma_dom->pte_pages)
          +                     goto free_dma_dom;
          +         
          +             l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
          +             if (l2_pde == NULL)
          +                     goto free_dma_dom;
          +         
          +             dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
          +         
          +             for (i = 0; i < num_pte_pages; ++i) {
          +                     dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
          +                     if (!dma_dom->pte_pages[i])
          +                             goto free_dma_dom;
          +                     address = virt_to_phys(dma_dom->pte_pages[i]);
          +                     l2_pde[i] = IOMMU_L1_PDE(address);
          +             }
          +         
          +             return dma_dom;
          +         
          +         free_dma_dom:
          +             dma_ops_domain_free(dma_dom);
          +         
          +             return NULL;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Find out the protection domain structure for a given PCI device. This
+ ++++++++++++++++++ * will give us the pointer to the page table root for example.
+ ++++++++++++++++++ */
          +         static struct protection_domain *domain_for_device(u16 devid)
          +         {
          +             struct protection_domain *dom;
          +             unsigned long flags;
          +         
          +             read_lock_irqsave(&amd_iommu_devtable_lock, flags);
          +             dom = amd_iommu_pd_table[devid];
          +             read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
          +         
          +             return dom;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * If a device is not yet associated with a domain, this function does
+ ++++++++++++++++++ * assigns it visible for the hardware
+ ++++++++++++++++++ */
          +         static void set_device_domain(struct amd_iommu *iommu,
          +                                   struct protection_domain *domain,
          +                                   u16 devid)
          +         {
          +             unsigned long flags;
          +         
          +             u64 pte_root = virt_to_phys(domain->pt_root);
          +         
          +             pte_root |= (domain->mode & 0x07) << 9;
          +             pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2;
          +         
          +             write_lock_irqsave(&amd_iommu_devtable_lock, flags);
          +             amd_iommu_dev_table[devid].data[0] = pte_root;
          +             amd_iommu_dev_table[devid].data[1] = pte_root >> 32;
          +             amd_iommu_dev_table[devid].data[2] = domain->id;
          +         
          +             amd_iommu_pd_table[devid] = domain;
          +             write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
          +         
          +             iommu_queue_inv_dev_entry(iommu, devid);
          +         
          +             iommu->need_sync = 1;
          +         }
          +         
+ ++++++++++++++++++/*****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the dma_ops mapping/unmapping code.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * In the dma_ops path we only have the struct device. This function
+ ++++++++++++++++++ * finds the corresponding IOMMU, the protection domain and the
+ ++++++++++++++++++ * requestor id for a given device.
+ ++++++++++++++++++ * If the device is not yet associated with a domain this is also done
+ ++++++++++++++++++ * in this function.
+ ++++++++++++++++++ */
          +         static int get_device_resources(struct device *dev,
          +                                     struct amd_iommu **iommu,
          +                                     struct protection_domain **domain,
          +                                     u16 *bdf)
          +         {
          +             struct dma_ops_domain *dma_dom;
          +             struct pci_dev *pcidev;
          +             u16 _bdf;
          +         
          +             BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
          +         
          +             pcidev = to_pci_dev(dev);
- -------- ---------    _bdf = (pcidev->bus->number << 8) | pcidev->devfn;
+ ++++++++++++++++++    _bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
          +         
+ ++++++++++++++++++    /* device not translated by any IOMMU in the system? */
          +             if (_bdf >= amd_iommu_last_bdf) {
          +                     *iommu = NULL;
          +                     *domain = NULL;
          +                     *bdf = 0xffff;
          +                     return 0;
          +             }
          +         
          +             *bdf = amd_iommu_alias_table[_bdf];
          +         
          +             *iommu = amd_iommu_rlookup_table[*bdf];
          +             if (*iommu == NULL)
          +                     return 0;
          +             dma_dom = (*iommu)->default_dom;
          +             *domain = domain_for_device(*bdf);
          +             if (*domain == NULL) {
          +                     *domain = &dma_dom->domain;
          +                     set_device_domain(*iommu, *domain, *bdf);
          +                     printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
          +                                     "device ", (*domain)->id);
          +                     print_devid(_bdf, 1);
          +             }
          +         
          +             return 1;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This is the generic map function. It maps one 4kb page at paddr to
+ ++++++++++++++++++ * the given address in the DMA address space for the domain.
+ ++++++++++++++++++ */
          +         static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
          +                                          struct dma_ops_domain *dom,
          +                                          unsigned long address,
          +                                          phys_addr_t paddr,
          +                                          int direction)
          +         {
          +             u64 *pte, __pte;
          +         
          +             WARN_ON(address > dom->aperture_size);
          +         
          +             paddr &= PAGE_MASK;
          +         
          +             pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
          +             pte += IOMMU_PTE_L0_INDEX(address);
          +         
          +             __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
          +         
          +             if (direction == DMA_TO_DEVICE)
          +                     __pte |= IOMMU_PTE_IR;
          +             else if (direction == DMA_FROM_DEVICE)
          +                     __pte |= IOMMU_PTE_IW;
          +             else if (direction == DMA_BIDIRECTIONAL)
          +                     __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
          +         
          +             WARN_ON(*pte);
          +         
          +             *pte = __pte;
          +         
          +             return (dma_addr_t)address;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The generic unmapping function for on page in the DMA address space.
+ ++++++++++++++++++ */
          +         static void dma_ops_domain_unmap(struct amd_iommu *iommu,
          +                                      struct dma_ops_domain *dom,
          +                                      unsigned long address)
          +         {
          +             u64 *pte;
          +         
          +             if (address >= dom->aperture_size)
          +                     return;
          +         
          +             WARN_ON(address & 0xfffULL || address > dom->aperture_size);
          +         
          +             pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
          +             pte += IOMMU_PTE_L0_INDEX(address);
          +         
          +             WARN_ON(!*pte);
          +         
          +             *pte = 0ULL;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function contains common code for mapping of a physically
+ ++++++++++++++++++ * contiguous memory region into DMA address space. It is uses by all
+ ++++++++++++++++++ * mapping functions provided by this IOMMU driver.
+ ++++++++++++++++++ * Must be called with the domain lock held.
+ ++++++++++++++++++ */
          +         static dma_addr_t __map_single(struct device *dev,
          +                                    struct amd_iommu *iommu,
          +                                    struct dma_ops_domain *dma_dom,
          +                                    phys_addr_t paddr,
          +                                    size_t size,
          +                                    int dir)
          +         {
          +             dma_addr_t offset = paddr & ~PAGE_MASK;
          +             dma_addr_t address, start;
          +             unsigned int pages;
          +             int i;
          +         
          +             pages = to_pages(paddr, size);
          +             paddr &= PAGE_MASK;
          +         
          +             address = dma_ops_alloc_addresses(dev, dma_dom, pages);
          +             if (unlikely(address == bad_dma_address))
          +                     goto out;
          +         
          +             start = address;
          +             for (i = 0; i < pages; ++i) {
          +                     dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
          +                     paddr += PAGE_SIZE;
          +                     start += PAGE_SIZE;
          +             }
          +             address += offset;
          +         
          +         out:
          +             return address;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Does the reverse of the __map_single function. Must be called with
+ ++++++++++++++++++ * the domain lock held too
+ ++++++++++++++++++ */
          +         static void __unmap_single(struct amd_iommu *iommu,
          +                                struct dma_ops_domain *dma_dom,
          +                                dma_addr_t dma_addr,
          +                                size_t size,
          +                                int dir)
          +         {
          +             dma_addr_t i, start;
          +             unsigned int pages;
          +         
          +             if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
          +                     return;
          +         
          +             pages = to_pages(dma_addr, size);
          +             dma_addr &= PAGE_MASK;
          +             start = dma_addr;
          +         
          +             for (i = 0; i < pages; ++i) {
          +                     dma_ops_domain_unmap(iommu, dma_dom, start);
          +                     start += PAGE_SIZE;
          +             }
          +         
          +             dma_ops_free_addresses(dma_dom, dma_addr, pages);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported map_single function for dma_ops.
+ ++++++++++++++++++ */
          +         static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
          +                                  size_t size, int dir)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +             dma_addr_t addr;
          +         
          +             get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +             if (iommu == NULL || domain == NULL)
+ ++++++++++++++++++            /* device not handled by any AMD IOMMU */
          +                     return (dma_addr_t)paddr;
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +             addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
          +             if (addr == bad_dma_address)
          +                     goto out;
          +         
          +             if (iommu_has_npcache(iommu))
          +                     iommu_flush_pages(iommu, domain->id, addr, size);
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +         out:
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +             return addr;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported unmap_single function for dma_ops.
+ ++++++++++++++++++ */
          +         static void unmap_single(struct device *dev, dma_addr_t dma_addr,
          +                              size_t size, int dir)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +         
          +             if (!get_device_resources(dev, &iommu, &domain, &devid))
+ ++++++++++++++++++            /* device not handled by any AMD IOMMU */
          +                     return;
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             __unmap_single(iommu, domain->priv, dma_addr, size, dir);
          +         
          +             iommu_flush_pages(iommu, domain->id, dma_addr, size);
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This is a special map_sg function which is used if we should map a
+ ++++++++++++++++++ * device which is not handled by an AMD IOMMU in the system.
+ ++++++++++++++++++ */
          +         static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
          +                                int nelems, int dir)
          +         {
          +             struct scatterlist *s;
          +             int i;
          +         
          +             for_each_sg(sglist, s, nelems, i) {
          +                     s->dma_address = (dma_addr_t)sg_phys(s);
          +                     s->dma_length  = s->length;
          +             }
          +         
          +             return nelems;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported map_sg function for dma_ops (handles scatter-gather
+ ++++++++++++++++++ * lists).
+ ++++++++++++++++++ */
          +         static int map_sg(struct device *dev, struct scatterlist *sglist,
          +                       int nelems, int dir)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +             int i;
          +             struct scatterlist *s;
          +             phys_addr_t paddr;
          +             int mapped_elems = 0;
          +         
          +             get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +             if (!iommu || !domain)
          +                     return map_sg_no_iommu(dev, sglist, nelems, dir);
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             for_each_sg(sglist, s, nelems, i) {
          +                     paddr = sg_phys(s);
          +         
          +                     s->dma_address = __map_single(dev, iommu, domain->priv,
          +                                                   paddr, s->length, dir);
          +         
          +                     if (s->dma_address) {
          +                             s->dma_length = s->length;
          +                             mapped_elems++;
          +                     } else
          +                             goto unmap;
          +                     if (iommu_has_npcache(iommu))
          +                             iommu_flush_pages(iommu, domain->id, s->dma_address,
          +                                               s->dma_length);
          +             }
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +         out:
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +             return mapped_elems;
          +         unmap:
          +             for_each_sg(sglist, s, mapped_elems, i) {
          +                     if (s->dma_address)
          +                             __unmap_single(iommu, domain->priv, s->dma_address,
          +                                            s->dma_length, dir);
          +                     s->dma_address = s->dma_length = 0;
          +             }
          +         
          +             mapped_elems = 0;
          +         
          +             goto out;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported map_sg function for dma_ops (handles scatter-gather
+ ++++++++++++++++++ * lists).
+ ++++++++++++++++++ */
          +         static void unmap_sg(struct device *dev, struct scatterlist *sglist,
          +                          int nelems, int dir)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             struct scatterlist *s;
          +             u16 devid;
          +             int i;
          +         
          +             if (!get_device_resources(dev, &iommu, &domain, &devid))
          +                     return;
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             for_each_sg(sglist, s, nelems, i) {
          +                     __unmap_single(iommu, domain->priv, s->dma_address,
          +                                    s->dma_length, dir);
          +                     iommu_flush_pages(iommu, domain->id, s->dma_address,
          +                                       s->dma_length);
          +                     s->dma_address = s->dma_length = 0;
          +             }
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported alloc_coherent function for dma_ops.
+ ++++++++++++++++++ */
          +         static void *alloc_coherent(struct device *dev, size_t size,
          +                                 dma_addr_t *dma_addr, gfp_t flag)
          +         {
          +             unsigned long flags;
          +             void *virt_addr;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +             phys_addr_t paddr;
          +         
          +             virt_addr = (void *)__get_free_pages(flag, get_order(size));
          +             if (!virt_addr)
          +                     return 0;
          +         
          +             memset(virt_addr, 0, size);
          +             paddr = virt_to_phys(virt_addr);
          +         
          +             get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +             if (!iommu || !domain) {
          +                     *dma_addr = (dma_addr_t)paddr;
          +                     return virt_addr;
          +             }
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
          +                                      size, DMA_BIDIRECTIONAL);
          +         
          +             if (*dma_addr == bad_dma_address) {
          +                     free_pages((unsigned long)virt_addr, get_order(size));
          +                     virt_addr = NULL;
          +                     goto out;
          +             }
          +         
          +             if (iommu_has_npcache(iommu))
          +                     iommu_flush_pages(iommu, domain->id, *dma_addr, size);
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +         out:
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +             return virt_addr;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported free_coherent function for dma_ops.
+ ++++++++++++++++++ * FIXME: fix the generic x86 DMA layer so that it actually calls that
+ ++++++++++++++++++ *        function.
+ ++++++++++++++++++ */
          +         static void free_coherent(struct device *dev, size_t size,
          +                               void *virt_addr, dma_addr_t dma_addr)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +         
          +             get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +             if (!iommu || !domain)
          +                     goto free_mem;
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
          +             iommu_flush_pages(iommu, domain->id, dma_addr, size);
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +         free_mem:
          +             free_pages((unsigned long)virt_addr, get_order(size));
          +         }
          +         
          +         /*
+ ++++++++++++++++++ * The function for pre-allocating protection domains.
+ ++++++++++++++++++ *
          +          * If the driver core informs the DMA layer if a driver grabs a device
          +          * we don't need to preallocate the protection domains anymore.
          +          * For now we have to.
          +          */
          +         void prealloc_protection_domains(void)
          +         {
          +             struct pci_dev *dev = NULL;
          +             struct dma_ops_domain *dma_dom;
          +             struct amd_iommu *iommu;
          +             int order = amd_iommu_aperture_order;
          +             u16 devid;
          +         
          +             while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
          +                     devid = (dev->bus->number << 8) | dev->devfn;
          +                     if (devid >= amd_iommu_last_bdf)
          +                             continue;
          +                     devid = amd_iommu_alias_table[devid];
          +                     if (domain_for_device(devid))
          +                             continue;
          +                     iommu = amd_iommu_rlookup_table[devid];
          +                     if (!iommu)
          +                             continue;
          +                     dma_dom = dma_ops_domain_alloc(iommu, order);
          +                     if (!dma_dom)
          +                             continue;
          +                     init_unity_mappings_for_device(dma_dom, devid);
          +                     set_device_domain(iommu, &dma_dom->domain, devid);
          +                     printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ",
          +                            dma_dom->domain.id);
          +                     print_devid(devid, 1);
          +             }
          +         }
          +         
          +         static struct dma_mapping_ops amd_iommu_dma_ops = {
          +             .alloc_coherent = alloc_coherent,
          +             .free_coherent = free_coherent,
          +             .map_single = map_single,
          +             .unmap_single = unmap_single,
          +             .map_sg = map_sg,
          +             .unmap_sg = unmap_sg,
          +         };
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The function which clues the AMD IOMMU driver into dma_ops.
+ ++++++++++++++++++ */
          +         int __init amd_iommu_init_dma_ops(void)
          +         {
          +             struct amd_iommu *iommu;
          +             int order = amd_iommu_aperture_order;
          +             int ret;
          +         
+ ++++++++++++++++++    /*
+ ++++++++++++++++++     * first allocate a default protection domain for every IOMMU we
+ ++++++++++++++++++     * found in the system. Devices not assigned to any other
+ ++++++++++++++++++     * protection domain will be assigned to the default one.
+ ++++++++++++++++++     */
          +             list_for_each_entry(iommu, &amd_iommu_list, list) {
          +                     iommu->default_dom = dma_ops_domain_alloc(iommu, order);
          +                     if (iommu->default_dom == NULL)
          +                             return -ENOMEM;
          +                     ret = iommu_init_unity_mappings(iommu);
          +                     if (ret)
          +                             goto free_domains;
          +             }
          +         
+ ++++++++++++++++++    /*
+ ++++++++++++++++++     * If device isolation is enabled, pre-allocate the protection
+ ++++++++++++++++++     * domains for each device.
+ ++++++++++++++++++     */
          +             if (amd_iommu_isolate)
          +                     prealloc_protection_domains();
          +         
          +             iommu_detected = 1;
          +             force_iommu = 1;
          +             bad_dma_address = 0;
          +         #ifdef CONFIG_GART_IOMMU
          +             gart_iommu_aperture_disabled = 1;
          +             gart_iommu_aperture = 0;
          +         #endif
          +         
+ ++++++++++++++++++    /* Make the driver finally visible to the drivers */
          +             dma_ops = &amd_iommu_dma_ops;
          +         
          +             return 0;
          +         
          +         free_domains:
          +         
          +             list_for_each_entry(iommu, &amd_iommu_list, list) {
          +                     if (iommu->default_dom)
          +                             dma_ops_domain_free(iommu->default_dom);
          +             }
          +         
          +             return ret;
          +         }
index 2a13e43,7661b02,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,6643828,2a13e43,2a13e43,0000000,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43..c9d8ff2
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- /dev/null
          +         /*
          +          * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
          +          * Author: Joerg Roedel <joerg.roedel@amd.com>
          +          *         Leo Duran <leo.duran@amd.com>
          +          *
          +          * This program is free software; you can redistribute it and/or modify it
          +          * under the terms of the GNU General Public License version 2 as published
          +          * by the Free Software Foundation.
          +          *
          +          * This program is distributed in the hope that it will be useful,
          +          * but WITHOUT ANY WARRANTY; without even the implied warranty of
          +          * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
          +          * GNU General Public License for more details.
          +          *
          +          * You should have received a copy of the GNU General Public License
          +          * along with this program; if not, write to the Free Software
          +          * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
          +          */
          +         
          +         #include <linux/pci.h>
          +         #include <linux/acpi.h>
          +         #include <linux/gfp.h>
          +         #include <linux/list.h>
          +         #include <linux/sysdev.h>
          +         #include <asm/pci-direct.h>
          +         #include <asm/amd_iommu_types.h>
          +         #include <asm/amd_iommu.h>
------- -- ---------#include <asm/gart.h>
+++++++ ++++++++++++#include <asm/iommu.h>
          +         
          +         /*
          +          * definitions for the ACPI scanning code
          +          */
- -------- ---------#define UPDATE_LAST_BDF(x) do {\
- -------- ---------    if ((x) > amd_iommu_last_bdf) \
- -------- ---------            amd_iommu_last_bdf = (x); \
- -------- ---------    } while (0);
- -------- ---------
- -------- ---------#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
          +         #define PCI_BUS(x) (((x) >> 8) & 0xff)
          +         #define IVRS_HEADER_LENGTH 48
- -------- ---------#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
          +         
          +         #define ACPI_IVHD_TYPE                  0x10
          +         #define ACPI_IVMD_TYPE_ALL              0x20
          +         #define ACPI_IVMD_TYPE                  0x21
          +         #define ACPI_IVMD_TYPE_RANGE            0x22
          +         
          +         #define IVHD_DEV_ALL                    0x01
          +         #define IVHD_DEV_SELECT                 0x02
          +         #define IVHD_DEV_SELECT_RANGE_START     0x03
          +         #define IVHD_DEV_RANGE_END              0x04
          +         #define IVHD_DEV_ALIAS                  0x42
          +         #define IVHD_DEV_ALIAS_RANGE            0x43
          +         #define IVHD_DEV_EXT_SELECT             0x46
          +         #define IVHD_DEV_EXT_SELECT_RANGE       0x47
          +         
          +         #define IVHD_FLAG_HT_TUN_EN             0x00
          +         #define IVHD_FLAG_PASSPW_EN             0x01
          +         #define IVHD_FLAG_RESPASSPW_EN          0x02
          +         #define IVHD_FLAG_ISOC_EN               0x03
          +         
          +         #define IVMD_FLAG_EXCL_RANGE            0x08
          +         #define IVMD_FLAG_UNITY_MAP             0x01
          +         
          +         #define ACPI_DEVFLAG_INITPASS           0x01
          +         #define ACPI_DEVFLAG_EXTINT             0x02
          +         #define ACPI_DEVFLAG_NMI                0x04
          +         #define ACPI_DEVFLAG_SYSMGT1            0x10
          +         #define ACPI_DEVFLAG_SYSMGT2            0x20
          +         #define ACPI_DEVFLAG_LINT0              0x40
          +         #define ACPI_DEVFLAG_LINT1              0x80
          +         #define ACPI_DEVFLAG_ATSDIS             0x10000000
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * ACPI table definitions
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * These data structures are laid over the table to parse the important values
+ ++++++++++++++++++ * out of it.
+ ++++++++++++++++++ */
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * structure describing one IOMMU in the ACPI table. Typically followed by one
+ ++++++++++++++++++ * or more ivhd_entrys.
+ ++++++++++++++++++ */
          +         struct ivhd_header {
          +             u8 type;
          +             u8 flags;
          +             u16 length;
          +             u16 devid;
          +             u16 cap_ptr;
          +             u64 mmio_phys;
          +             u16 pci_seg;
          +             u16 info;
          +             u32 reserved;
          +         } __attribute__((packed));
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * A device entry describing which devices a specific IOMMU translates and
+ ++++++++++++++++++ * which requestor ids they use.
+ ++++++++++++++++++ */
          +         struct ivhd_entry {
          +             u8 type;
          +             u16 devid;
          +             u8 flags;
          +             u32 ext;
          +         } __attribute__((packed));
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * An AMD IOMMU memory definition structure. It defines things like exclusion
+ ++++++++++++++++++ * ranges for devices and regions that should be unity mapped.
+ ++++++++++++++++++ */
          +         struct ivmd_header {
          +             u8 type;
          +             u8 flags;
          +             u16 length;
          +             u16 devid;
          +             u16 aux;
          +             u64 resv;
          +             u64 range_start;
          +             u64 range_length;
          +         } __attribute__((packed));
          +         
          +         static int __initdata amd_iommu_detected;
          +         
- -------- ---------u16 amd_iommu_last_bdf;
- -------- ---------struct list_head amd_iommu_unity_map;
- -------- ---------unsigned amd_iommu_aperture_order = 26;
- -------- ---------int amd_iommu_isolate;
+ ++++++++++++++++++u16 amd_iommu_last_bdf;                     /* largest PCI device id we have
+ ++++++++++++++++++                                       to handle */
+ ++++++++++++++++++LIST_HEAD(amd_iommu_unity_map);             /* a list of required unity mappings
+ ++++++++++++++++++                                       we find in ACPI */
+ ++++++++++++++++++unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
+ ++++++++++++++++++int amd_iommu_isolate;                      /* if 1, device isolation is enabled */
+ ++++++++++++++++++
+ ++++++++++++++++++LIST_HEAD(amd_iommu_list);          /* list of all AMD IOMMUs in the
+ ++++++++++++++++++                                       system */
          +         
- -------- ---------struct list_head amd_iommu_list;
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Pointer to the device table which is shared by all AMD IOMMUs
+ ++++++++++++++++++ * it is indexed by the PCI device id or the HT unit id and contains
+ ++++++++++++++++++ * information about the domain the device belongs to as well as the
+ ++++++++++++++++++ * page table root pointer.
+ ++++++++++++++++++ */
          +         struct dev_table_entry *amd_iommu_dev_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The alias table is a driver specific data structure which contains the
+ ++++++++++++++++++ * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
+ ++++++++++++++++++ * More than one device can share the same requestor id.
+ ++++++++++++++++++ */
          +         u16 *amd_iommu_alias_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The rlookup table is used to find the IOMMU which is responsible
+ ++++++++++++++++++ * for a specific device. It is also indexed by the PCI device id.
+ ++++++++++++++++++ */
          +         struct amd_iommu **amd_iommu_rlookup_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The pd table (protection domain table) is used to find the protection domain
+ ++++++++++++++++++ * data structure a device belongs to. Indexed with the PCI device id too.
+ ++++++++++++++++++ */
          +         struct protection_domain **amd_iommu_pd_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
+ ++++++++++++++++++ * to know which ones are already in use.
+ ++++++++++++++++++ */
          +         unsigned long *amd_iommu_pd_alloc_bitmap;
          +         
- -------- ---------static u32 dev_table_size;
- -------- ---------static u32 alias_table_size;
- -------- ---------static u32 rlookup_table_size;
+ ++++++++++++++++++static u32 dev_table_size;  /* size of the device table */
+ ++++++++++++++++++static u32 alias_table_size;        /* size of the alias table */
+ ++++++++++++++++++static u32 rlookup_table_size;      /* size if the rlookup table */
          +         
+ ++++++++++++++++++static inline void update_last_devid(u16 devid)
+ ++++++++++++++++++{
+ ++++++++++++++++++    if (devid > amd_iommu_last_bdf)
+ ++++++++++++++++++            amd_iommu_last_bdf = devid;
+ ++++++++++++++++++}
+ ++++++++++++++++++
+ ++++++++++++++++++static inline unsigned long tbl_size(int entry_size)
+ ++++++++++++++++++{
+ ++++++++++++++++++    unsigned shift = PAGE_SHIFT +
+ ++++++++++++++++++                     get_order(amd_iommu_last_bdf * entry_size);
+ ++++++++++++++++++
+ ++++++++++++++++++    return 1UL << shift;
+ ++++++++++++++++++}
+ ++++++++++++++++++
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * AMD IOMMU MMIO register space handling functions
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * These functions are used to program the IOMMU device registers in
+ ++++++++++++++++++ * MMIO space required for that driver.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function set the exclusion range in the IOMMU. DMA accesses to the
+ ++++++++++++++++++ * exclusion range are passed through untranslated
+ ++++++++++++++++++ */
          +         static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
          +         {
          +             u64 start = iommu->exclusion_start & PAGE_MASK;
          +             u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
          +             u64 entry;
          +         
          +             if (!iommu->exclusion_start)
          +                     return;
          +         
          +             entry = start | MMIO_EXCL_ENABLE_MASK;
          +             memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
          +                             &entry, sizeof(entry));
          +         
          +             entry = limit;
          +             memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
          +                             &entry, sizeof(entry));
          +         }
          +         
+ ++++++++++++++++++/* Programs the physical address of the device table into the IOMMU hardware */
          +         static void __init iommu_set_device_table(struct amd_iommu *iommu)
          +         {
          +             u32 entry;
          +         
          +             BUG_ON(iommu->mmio_base == NULL);
          +         
          +             entry = virt_to_phys(amd_iommu_dev_table);
          +             entry |= (dev_table_size >> 12) - 1;
          +             memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
          +                             &entry, sizeof(entry));
          +         }
          +         
+ ++++++++++++++++++/* Generic functions to enable/disable certain features of the IOMMU. */
          +         static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
          +         {
          +             u32 ctrl;
          +         
          +             ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +             ctrl |= (1 << bit);
          +             writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +         }
          +         
          +         static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
          +         {
          +             u32 ctrl;
          +         
          +             ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +             ctrl &= ~(1 << bit);
          +             writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +         }
          +         
+ ++++++++++++++++++/* Function to enable the hardware */
          +         void __init iommu_enable(struct amd_iommu *iommu)
          +         {
          +             printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at ");
          +             print_devid(iommu->devid, 0);
          +             printk(" cap 0x%hx\n", iommu->cap_ptr);
          +         
          +             iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
+ ++++++++++++++++++ * the system has one.
+ ++++++++++++++++++ */
          +         static u8 * __init iommu_map_mmio_space(u64 address)
          +         {
          +             u8 *ret;
          +         
          +             if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu"))
          +                     return NULL;
          +         
          +             ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
          +             if (ret != NULL)
          +                     return ret;
          +         
          +             release_mem_region(address, MMIO_REGION_LENGTH);
          +         
          +             return NULL;
          +         }
          +         
          +         static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
          +         {
          +             if (iommu->mmio_base)
          +                     iounmap(iommu->mmio_base);
          +             release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The functions below belong to the first pass of AMD IOMMU ACPI table
+ ++++++++++++++++++ * parsing. In this pass we try to find out the highest device id this
+ ++++++++++++++++++ * code has to handle. Upon this information the size of the shared data
+ ++++++++++++++++++ * structures is determined later.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function reads the last device id the IOMMU has to handle from the PCI
+ ++++++++++++++++++ * capability header for this IOMMU
+ ++++++++++++++++++ */
          +         static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
          +         {
          +             u32 cap;
          +         
          +             cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
- -------- ---------    UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
+ ++++++++++++++++++    update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * After reading the highest device id from the IOMMU PCI capability header
+ ++++++++++++++++++ * this function looks if there is a higher device id defined in the ACPI table
+ ++++++++++++++++++ */
          +         static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
          +         {
          +             u8 *p = (void *)h, *end = (void *)h;
          +             struct ivhd_entry *dev;
          +         
          +             p += sizeof(*h);
          +             end += h->length;
          +         
          +             find_last_devid_on_pci(PCI_BUS(h->devid),
          +                             PCI_SLOT(h->devid),
          +                             PCI_FUNC(h->devid),
          +                             h->cap_ptr);
          +         
          +             while (p < end) {
          +                     dev = (struct ivhd_entry *)p;
          +                     switch (dev->type) {
          +                     case IVHD_DEV_SELECT:
          +                     case IVHD_DEV_RANGE_END:
          +                     case IVHD_DEV_ALIAS:
          +                     case IVHD_DEV_EXT_SELECT:
- -------- ---------                    UPDATE_LAST_BDF(dev->devid);
+ ++++++++++++++++++                    /* all the above subfield types refer to device ids */
+ ++++++++++++++++++                    update_last_devid(dev->devid);
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +                     p += 0x04 << (*p >> 6);
          +             }
          +         
          +             WARN_ON(p != end);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Iterate over all IVHD entries in the ACPI table and find the highest device
+ ++++++++++++++++++ * id which we need to handle. This is the first of three functions which parse
+ ++++++++++++++++++ * the ACPI table. So we check the checksum here.
+ ++++++++++++++++++ */
          +         static int __init find_last_devid_acpi(struct acpi_table_header *table)
          +         {
          +             int i;
          +             u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
          +             struct ivhd_header *h;
          +         
          +             /*
          +              * Validate checksum here so we don't need to do it when
          +              * we actually parse the table
          +              */
          +             for (i = 0; i < table->length; ++i)
          +                     checksum += p[i];
          +             if (checksum != 0)
          +                     /* ACPI table corrupt */
          +                     return -ENODEV;
          +         
          +             p += IVRS_HEADER_LENGTH;
          +         
          +             end += table->length;
          +             while (p < end) {
          +                     h = (struct ivhd_header *)p;
          +                     switch (h->type) {
          +                     case ACPI_IVHD_TYPE:
          +                             find_last_devid_from_ivhd(h);
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +                     p += h->length;
          +             }
          +             WARN_ON(p != end);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The following functions belong the the code path which parses the ACPI table
+ ++++++++++++++++++ * the second time. In this ACPI parsing iteration we allocate IOMMU specific
+ ++++++++++++++++++ * data structures, initialize the device/alias/rlookup table and also
+ ++++++++++++++++++ * basically initialize the hardware.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Allocates the command buffer. This buffer is per AMD IOMMU. We can
+ ++++++++++++++++++ * write commands to that buffer later and the IOMMU will execute them
+ ++++++++++++++++++ * asynchronously
+ ++++++++++++++++++ */
          +         static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
          +         {
- -------- ---------    u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++    u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
          +                             get_order(CMD_BUFFER_SIZE));
- -------- ---------    u64 entry = 0;
+ ++++++++++++++++++    u64 entry;
          +         
          +             if (cmd_buf == NULL)
          +                     return NULL;
          +         
          +             iommu->cmd_buf_size = CMD_BUFFER_SIZE;
          +         
- -------- ---------    memset(cmd_buf, 0, CMD_BUFFER_SIZE);
- -------- ---------
          +             entry = (u64)virt_to_phys(cmd_buf);
          +             entry |= MMIO_CMD_SIZE_512;
          +             memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
          +                             &entry, sizeof(entry));
          +         
          +             iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
          +         
          +             return cmd_buf;
          +         }
          +         
          +         static void __init free_command_buffer(struct amd_iommu *iommu)
          +         {
- -------- ---------    if (iommu->cmd_buf)
- -------- ---------            free_pages((unsigned long)iommu->cmd_buf,
- -------- ---------                            get_order(CMD_BUFFER_SIZE));
+ ++++++++++++++++++    free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
          +         }
          +         
+ ++++++++++++++++++/* sets a specific bit in the device table entry. */
          +         static void set_dev_entry_bit(u16 devid, u8 bit)
          +         {
          +             int i = (bit >> 5) & 0x07;
          +             int _bit = bit & 0x1f;
          +         
          +             amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
          +         }
          +         
- -------- ---------static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
+ ++++++++++++++++++/* Writes the specific IOMMU for a device into the rlookup table */
+ ++++++++++++++++++static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
+ ++++++++++++++++++{
+ ++++++++++++++++++    amd_iommu_rlookup_table[devid] = iommu;
+ ++++++++++++++++++}
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function takes the device specific flags read from the ACPI
+ ++++++++++++++++++ * table and sets up the device table entry with that information
+ ++++++++++++++++++ */
+ ++++++++++++++++++static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
+ ++++++++++++++++++                                       u16 devid, u32 flags, u32 ext_flags)
          +         {
          +             if (flags & ACPI_DEVFLAG_INITPASS)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
          +             if (flags & ACPI_DEVFLAG_EXTINT)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
          +             if (flags & ACPI_DEVFLAG_NMI)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
          +             if (flags & ACPI_DEVFLAG_SYSMGT1)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
          +             if (flags & ACPI_DEVFLAG_SYSMGT2)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
          +             if (flags & ACPI_DEVFLAG_LINT0)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
          +             if (flags & ACPI_DEVFLAG_LINT1)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
- -------- ---------}
          +         
- -------- ---------static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
- -------- ---------{
- -------- ---------    amd_iommu_rlookup_table[devid] = iommu;
+ ++++++++++++++++++    set_iommu_for_device(iommu, devid);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Reads the device exclusion range from ACPI and initialize IOMMU with
+ ++++++++++++++++++ * it
+ ++++++++++++++++++ */
          +         static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
          +         {
          +             struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
          +         
          +             if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
          +                     return;
          +         
          +             if (iommu) {
+ ++++++++++++++++++            /*
+ ++++++++++++++++++             * We only can configure exclusion ranges per IOMMU, not
+ ++++++++++++++++++             * per device. But we can enable the exclusion range per
+ ++++++++++++++++++             * device. This is done here
+ ++++++++++++++++++             */
          +                     set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
          +                     iommu->exclusion_start = m->range_start;
          +                     iommu->exclusion_length = m->range_length;
          +             }
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function reads some important data from the IOMMU PCI space and
+ ++++++++++++++++++ * initializes the driver data structure with it. It reads the hardware
+ ++++++++++++++++++ * capabilities and the first/last device entries
+ ++++++++++++++++++ */
          +         static void __init init_iommu_from_pci(struct amd_iommu *iommu)
          +         {
          +             int bus = PCI_BUS(iommu->devid);
          +             int dev = PCI_SLOT(iommu->devid);
          +             int fn  = PCI_FUNC(iommu->devid);
          +             int cap_ptr = iommu->cap_ptr;
          +             u32 range;
          +         
          +             iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
          +         
          +             range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
- -------- ---------    iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range));
- -------- ---------    iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range));
+ ++++++++++++++++++    iommu->first_device = calc_devid(MMIO_GET_BUS(range),
+ ++++++++++++++++++                                     MMIO_GET_FD(range));
+ ++++++++++++++++++    iommu->last_device = calc_devid(MMIO_GET_BUS(range),
+ ++++++++++++++++++                                    MMIO_GET_LD(range));
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Takes a pointer to an AMD IOMMU entry in the ACPI table and
+ ++++++++++++++++++ * initializes the hardware and our data structures with it.
+ ++++++++++++++++++ */
          +         static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
          +                                             struct ivhd_header *h)
          +         {
          +             u8 *p = (u8 *)h;
          +             u8 *end = p, flags = 0;
          +             u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
          +             u32 ext_flags = 0;
- -------- ---------    bool alias = 0;
+ ++++++++++++++++++    bool alias = false;
          +             struct ivhd_entry *e;
          +         
          +             /*
          +              * First set the recommended feature enable bits from ACPI
          +              * into the IOMMU control registers
          +              */
          +             h->flags & IVHD_FLAG_HT_TUN_EN ?
          +                     iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
          +                     iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
          +         
          +             h->flags & IVHD_FLAG_PASSPW_EN ?
          +                     iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
          +                     iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
          +         
          +             h->flags & IVHD_FLAG_RESPASSPW_EN ?
          +                     iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
          +                     iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
          +         
          +             h->flags & IVHD_FLAG_ISOC_EN ?
          +                     iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
          +                     iommu_feature_disable(iommu, CONTROL_ISOC_EN);
          +         
          +             /*
          +              * make IOMMU memory accesses cache coherent
          +              */
          +             iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
          +         
          +             /*
          +              * Done. Now parse the device entries
          +              */
          +             p += sizeof(struct ivhd_header);
          +             end += h->length;
          +         
          +             while (p < end) {
          +                     e = (struct ivhd_entry *)p;
          +                     switch (e->type) {
          +                     case IVHD_DEV_ALL:
          +                             for (dev_i = iommu->first_device;
          +                                             dev_i <= iommu->last_device; ++dev_i)
- -------- ---------                            set_dev_entry_from_acpi(dev_i, e->flags, 0);
+ ++++++++++++++++++                            set_dev_entry_from_acpi(iommu, dev_i,
+ ++++++++++++++++++                                                    e->flags, 0);
          +                             break;
          +                     case IVHD_DEV_SELECT:
          +                             devid = e->devid;
- -------- ---------                    set_dev_entry_from_acpi(devid, e->flags, 0);
+ ++++++++++++++++++                    set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
          +                             break;
          +                     case IVHD_DEV_SELECT_RANGE_START:
          +                             devid_start = e->devid;
          +                             flags = e->flags;
          +                             ext_flags = 0;
- -------- ---------                    alias = 0;
+ ++++++++++++++++++                    alias = false;
          +                             break;
          +                     case IVHD_DEV_ALIAS:
          +                             devid = e->devid;
          +                             devid_to = e->ext >> 8;
- -------- ---------                    set_dev_entry_from_acpi(devid, e->flags, 0);
+ ++++++++++++++++++                    set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
          +                             amd_iommu_alias_table[devid] = devid_to;
          +                             break;
          +                     case IVHD_DEV_ALIAS_RANGE:
          +                             devid_start = e->devid;
          +                             flags = e->flags;
          +                             devid_to = e->ext >> 8;
          +                             ext_flags = 0;
- -------- ---------                    alias = 1;
+ ++++++++++++++++++                    alias = true;
          +                             break;
          +                     case IVHD_DEV_EXT_SELECT:
          +                             devid = e->devid;
- -------- ---------                    set_dev_entry_from_acpi(devid, e->flags, e->ext);
+ ++++++++++++++++++                    set_dev_entry_from_acpi(iommu, devid, e->flags,
+ ++++++++++++++++++                                            e->ext);
          +                             break;
          +                     case IVHD_DEV_EXT_SELECT_RANGE:
          +                             devid_start = e->devid;
          +                             flags = e->flags;
          +                             ext_flags = e->ext;
- -------- ---------                    alias = 0;
+ ++++++++++++++++++                    alias = false;
          +                             break;
          +                     case IVHD_DEV_RANGE_END:
          +                             devid = e->devid;
          +                             for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
          +                                     if (alias)
          +                                             amd_iommu_alias_table[dev_i] = devid_to;
- -------- ---------                            set_dev_entry_from_acpi(
+ ++++++++++++++++++                            set_dev_entry_from_acpi(iommu,
          +                                                     amd_iommu_alias_table[dev_i],
          +                                                     flags, ext_flags);
          +                             }
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +         
          +                     p += 0x04 << (e->type >> 6);
          +             }
          +         }
          +         
+ ++++++++++++++++++/* Initializes the device->iommu mapping for the driver */
          +         static int __init init_iommu_devices(struct amd_iommu *iommu)
          +         {
          +             u16 i;
          +         
          +             for (i = iommu->first_device; i <= iommu->last_device; ++i)
          +                     set_iommu_for_device(iommu, i);
          +         
          +             return 0;
          +         }
          +         
          +         static void __init free_iommu_one(struct amd_iommu *iommu)
          +         {
          +             free_command_buffer(iommu);
          +             iommu_unmap_mmio_space(iommu);
          +         }
          +         
          +         static void __init free_iommu_all(void)
          +         {
          +             struct amd_iommu *iommu, *next;
          +         
          +             list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
          +                     list_del(&iommu->list);
          +                     free_iommu_one(iommu);
          +                     kfree(iommu);
          +             }
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function clues the initialization function for one IOMMU
+ ++++++++++++++++++ * together and also allocates the command buffer and programs the
+ ++++++++++++++++++ * hardware. It does NOT enable the IOMMU. This is done afterwards.
+ ++++++++++++++++++ */
          +         static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
          +         {
          +             spin_lock_init(&iommu->lock);
          +             list_add_tail(&iommu->list, &amd_iommu_list);
          +         
          +             /*
          +              * Copy data from ACPI table entry to the iommu struct
          +              */
          +             iommu->devid = h->devid;
          +             iommu->cap_ptr = h->cap_ptr;
          +             iommu->mmio_phys = h->mmio_phys;
          +             iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
          +             if (!iommu->mmio_base)
          +                     return -ENOMEM;
          +         
          +             iommu_set_device_table(iommu);
          +             iommu->cmd_buf = alloc_command_buffer(iommu);
          +             if (!iommu->cmd_buf)
          +                     return -ENOMEM;
          +         
          +             init_iommu_from_pci(iommu);
          +             init_iommu_from_acpi(iommu, h);
          +             init_iommu_devices(iommu);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Iterates over all IOMMU entries in the ACPI table, allocates the
+ ++++++++++++++++++ * IOMMU structure and initializes it with init_iommu_one()
+ ++++++++++++++++++ */
          +         static int __init init_iommu_all(struct acpi_table_header *table)
          +         {
          +             u8 *p = (u8 *)table, *end = (u8 *)table;
          +             struct ivhd_header *h;
          +             struct amd_iommu *iommu;
          +             int ret;
          +         
- -------- ---------    INIT_LIST_HEAD(&amd_iommu_list);
- -------- ---------
          +             end += table->length;
          +             p += IVRS_HEADER_LENGTH;
          +         
          +             while (p < end) {
          +                     h = (struct ivhd_header *)p;
          +                     switch (*p) {
          +                     case ACPI_IVHD_TYPE:
          +                             iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
          +                             if (iommu == NULL)
          +                                     return -ENOMEM;
          +                             ret = init_iommu_one(iommu, h);
          +                             if (ret)
          +                                     return ret;
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +                     p += h->length;
          +         
          +             }
          +             WARN_ON(p != end);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the third pass of parsing the ACPI
+ ++++++++++++++++++ * table. In this last pass the memory mapping requirements are
+ ++++++++++++++++++ * gathered (like exclusion and unity mapping reanges).
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
          +         static void __init free_unity_maps(void)
          +         {
          +             struct unity_map_entry *entry, *next;
          +         
          +             list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
          +                     list_del(&entry->list);
          +                     kfree(entry);
          +             }
          +         }
          +         
+ ++++++++++++++++++/* called when we find an exclusion range definition in ACPI */
          +         static int __init init_exclusion_range(struct ivmd_header *m)
          +         {
          +             int i;
          +         
          +             switch (m->type) {
          +             case ACPI_IVMD_TYPE:
          +                     set_device_exclusion_range(m->devid, m);
          +                     break;
          +             case ACPI_IVMD_TYPE_ALL:
          +                     for (i = 0; i < amd_iommu_last_bdf; ++i)
          +                             set_device_exclusion_range(i, m);
          +                     break;
          +             case ACPI_IVMD_TYPE_RANGE:
          +                     for (i = m->devid; i <= m->aux; ++i)
          +                             set_device_exclusion_range(i, m);
          +                     break;
          +             default:
          +                     break;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/* called for unity map ACPI definition */
          +         static int __init init_unity_map_range(struct ivmd_header *m)
          +         {
          +             struct unity_map_entry *e = 0;
          +         
          +             e = kzalloc(sizeof(*e), GFP_KERNEL);
          +             if (e == NULL)
          +                     return -ENOMEM;
          +         
          +             switch (m->type) {
          +             default:
          +             case ACPI_IVMD_TYPE:
          +                     e->devid_start = e->devid_end = m->devid;
          +                     break;
          +             case ACPI_IVMD_TYPE_ALL:
          +                     e->devid_start = 0;
          +                     e->devid_end = amd_iommu_last_bdf;
          +                     break;
          +             case ACPI_IVMD_TYPE_RANGE:
          +                     e->devid_start = m->devid;
          +                     e->devid_end = m->aux;
          +                     break;
          +             }
          +             e->address_start = PAGE_ALIGN(m->range_start);
          +             e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
          +             e->prot = m->flags >> 1;
          +         
          +             list_add_tail(&e->list, &amd_iommu_unity_map);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/* iterates over all memory definitions we find in the ACPI table */
          +         static int __init init_memory_definitions(struct acpi_table_header *table)
          +         {
          +             u8 *p = (u8 *)table, *end = (u8 *)table;
          +             struct ivmd_header *m;
          +         
- -------- ---------    INIT_LIST_HEAD(&amd_iommu_unity_map);
- -------- ---------
          +             end += table->length;
          +             p += IVRS_HEADER_LENGTH;
          +         
          +             while (p < end) {
          +                     m = (struct ivmd_header *)p;
          +                     if (m->flags & IVMD_FLAG_EXCL_RANGE)
          +                             init_exclusion_range(m);
          +                     else if (m->flags & IVMD_FLAG_UNITY_MAP)
          +                             init_unity_map_range(m);
          +         
          +                     p += m->length;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function finally enables all IOMMUs found in the system after
+ ++++++++++++++++++ * they have been initialized
+ ++++++++++++++++++ */
          +         static void __init enable_iommus(void)
          +         {
          +             struct amd_iommu *iommu;
          +         
          +             list_for_each_entry(iommu, &amd_iommu_list, list) {
          +                     iommu_set_exclusion_range(iommu);
          +                     iommu_enable(iommu);
          +             }
          +         }
          +         
          +         /*
          +          * Suspend/Resume support
          +          * disable suspend until real resume implemented
          +          */
          +         
          +         static int amd_iommu_resume(struct sys_device *dev)
          +         {
          +             return 0;
          +         }
          +         
          +         static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
          +         {
          +             return -EINVAL;
          +         }
          +         
          +         static struct sysdev_class amd_iommu_sysdev_class = {
          +             .name = "amd_iommu",
          +             .suspend = amd_iommu_suspend,
          +             .resume = amd_iommu_resume,
          +         };
          +         
          +         static struct sys_device device_amd_iommu = {
          +             .id = 0,
          +             .cls = &amd_iommu_sysdev_class,
          +         };
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This is the core init function for AMD IOMMU hardware in the system.
+ ++++++++++++++++++ * This function is called from the generic x86 DMA layer initialization
+ ++++++++++++++++++ * code.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * This function basically parses the ACPI table for AMD IOMMU (IVRS)
+ ++++++++++++++++++ * three times:
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *  1 pass) Find the highest PCI device id the driver has to handle.
+ ++++++++++++++++++ *          Upon this information the size of the data structures is
+ ++++++++++++++++++ *          determined that needs to be allocated.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *  2 pass) Initialize the data structures just allocated with the
+ ++++++++++++++++++ *          information in the ACPI table about available AMD IOMMUs
+ ++++++++++++++++++ *          in the system. It also maps the PCI devices in the
+ ++++++++++++++++++ *          system to specific IOMMUs
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *  3 pass) After the basic data structures are allocated and
+ ++++++++++++++++++ *          initialized we update them with information about memory
+ ++++++++++++++++++ *          remapping requirements parsed out of the ACPI table in
+ ++++++++++++++++++ *          this last pass.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * After that the hardware is initialized and ready to go. In the last
+ ++++++++++++++++++ * step we do some Linux specific things like registering the driver in
+ ++++++++++++++++++ * the dma_ops interface and initializing the suspend/resume support
+ ++++++++++++++++++ * functions. Finally it prints some information about AMD IOMMUs and
+ ++++++++++++++++++ * the driver state and enables the hardware.
+ ++++++++++++++++++ */
          +         int __init amd_iommu_init(void)
          +         {
          +             int i, ret = 0;
          +         
          +         
          +             if (no_iommu) {
          +                     printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
          +                     return 0;
          +             }
          +         
          +             if (!amd_iommu_detected)
          +                     return -ENODEV;
          +         
          +             /*
          +              * First parse ACPI tables to find the largest Bus/Dev/Func
          +              * we need to handle. Upon this information the shared data
          +              * structures for the IOMMUs in the system will be allocated
          +              */
          +             if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
          +                     return -ENODEV;
          +         
- -------- ---------    dev_table_size     = TBL_SIZE(DEV_TABLE_ENTRY_SIZE);
- -------- ---------    alias_table_size   = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE);
- -------- ---------    rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE);
+ ++++++++++++++++++    dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE);
+ ++++++++++++++++++    alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
+ ++++++++++++++++++    rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
          +         
          +             ret = -ENOMEM;
          +         
          +             /* Device table - directly used by all IOMMUs */
- -------- ---------    amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++    amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
          +                                           get_order(dev_table_size));
          +             if (amd_iommu_dev_table == NULL)
          +                     goto out;
          +         
          +             /*
          +              * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
          +              * IOMMU see for that device
          +              */
          +             amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
          +                             get_order(alias_table_size));
          +             if (amd_iommu_alias_table == NULL)
          +                     goto free;
          +         
          +             /* IOMMU rlookup table - find the IOMMU for a specific device */
          +             amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL,
          +                             get_order(rlookup_table_size));
          +             if (amd_iommu_rlookup_table == NULL)
          +                     goto free;
          +         
          +             /*
          +              * Protection Domain table - maps devices to protection domains
          +              * This table has the same size as the rlookup_table
          +              */
- -------- ---------    amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++    amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
          +                                          get_order(rlookup_table_size));
          +             if (amd_iommu_pd_table == NULL)
          +                     goto free;
          +         
- -------- ---------    amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++    amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
+ ++++++++++++++++++                                        GFP_KERNEL | __GFP_ZERO,
          +                                                 get_order(MAX_DOMAIN_ID/8));
          +             if (amd_iommu_pd_alloc_bitmap == NULL)
          +                     goto free;
          +         
          +             /*
- -------- ---------     * memory is allocated now; initialize the device table with all zeroes
- -------- ---------     * and let all alias entries point to itself
+ ++++++++++++++++++     * let all alias entries point to itself
          +              */
- -------- ---------    memset(amd_iommu_dev_table, 0, dev_table_size);
          +             for (i = 0; i < amd_iommu_last_bdf; ++i)
          +                     amd_iommu_alias_table[i] = i;
          +         
- -------- ---------    memset(amd_iommu_pd_table, 0, rlookup_table_size);
- -------- ---------    memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
- -------- ---------
          +             /*
          +              * never allocate domain 0 because its used as the non-allocated and
          +              * error value placeholder
          +              */
          +             amd_iommu_pd_alloc_bitmap[0] = 1;
          +         
          +             /*
          +              * now the data structures are allocated and basically initialized
          +              * start the real acpi table scan
          +              */
          +             ret = -ENODEV;
          +             if (acpi_table_parse("IVRS", init_iommu_all) != 0)
          +                     goto free;
          +         
          +             if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
          +                     goto free;
          +         
          +             ret = amd_iommu_init_dma_ops();
          +             if (ret)
          +                     goto free;
          +         
          +             ret = sysdev_class_register(&amd_iommu_sysdev_class);
          +             if (ret)
          +                     goto free;
          +         
          +             ret = sysdev_register(&device_amd_iommu);
          +             if (ret)
          +                     goto free;
          +         
          +             enable_iommus();
          +         
          +             printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
          +                             (1 << (amd_iommu_aperture_order-20)));
          +         
          +             printk(KERN_INFO "AMD IOMMU: device isolation ");
          +             if (amd_iommu_isolate)
          +                     printk("enabled\n");
          +             else
          +                     printk("disabled\n");
          +         
          +         out:
          +             return ret;
          +         
          +         free:
- -------- ---------    if (amd_iommu_pd_alloc_bitmap)
- -------- ---------            free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
          +         
- -------- ---------    if (amd_iommu_pd_table)
- -------- ---------            free_pages((unsigned long)amd_iommu_pd_table,
- -------- ---------                            get_order(rlookup_table_size));
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_pd_table,
+ ++++++++++++++++++               get_order(rlookup_table_size));
          +         
- -------- ---------    if (amd_iommu_rlookup_table)
- -------- ---------            free_pages((unsigned long)amd_iommu_rlookup_table,
- -------- ---------                            get_order(rlookup_table_size));
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_rlookup_table,
+ ++++++++++++++++++               get_order(rlookup_table_size));
          +         
- -------- ---------    if (amd_iommu_alias_table)
- -------- ---------            free_pages((unsigned long)amd_iommu_alias_table,
- -------- ---------                            get_order(alias_table_size));
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_alias_table,
+ ++++++++++++++++++               get_order(alias_table_size));
          +         
- -------- ---------    if (amd_iommu_dev_table)
- -------- ---------            free_pages((unsigned long)amd_iommu_dev_table,
- -------- ---------                            get_order(dev_table_size));
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_dev_table,
+ ++++++++++++++++++               get_order(dev_table_size));
          +         
          +             free_iommu_all();
          +         
          +             free_unity_maps();
          +         
          +             goto out;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * Early detect code. This code runs at IOMMU detection time in the DMA
+ ++++++++++++++++++ * layer. It just looks if there is an IVRS ACPI table to detect AMD
+ ++++++++++++++++++ * IOMMUs
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
          +         static int __init early_amd_iommu_detect(struct acpi_table_header *table)
          +         {
          +             return 0;
          +         }
          +         
          +         void __init amd_iommu_detect(void)
          +         {
- -------- ---------    if (swiotlb || no_iommu || iommu_detected)
+ ++++++++++++++++++    if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture))
          +                     return;
          +         
          +             if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
          +                     iommu_detected = 1;
          +                     amd_iommu_detected = 1;
          +         #ifdef CONFIG_GART_IOMMU
          +                     gart_iommu_aperture_disabled = 1;
          +                     gart_iommu_aperture = 0;
          +         #endif
          +             }
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * Parsing functions for the AMD IOMMU specific kernel command line
+ ++++++++++++++++++ * options.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
          +         static int __init parse_amd_iommu_options(char *str)
          +         {
          +             for (; *str; ++str) {
          +                     if (strcmp(str, "isolate") == 0)
          +                             amd_iommu_isolate = 1;
          +             }
          +         
          +             return 1;
          +         }
          +         
          +         static int __init parse_amd_iommu_size_options(char *str)
          +         {
- -------- ---------    for (; *str; ++str) {
- -------- ---------            if (strcmp(str, "32M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 25;
- -------- ---------            if (strcmp(str, "64M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 26;
- -------- ---------            if (strcmp(str, "128M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 27;
- -------- ---------            if (strcmp(str, "256M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 28;
- -------- ---------            if (strcmp(str, "512M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 29;
- -------- ---------            if (strcmp(str, "1G") == 0)
- -------- ---------                    amd_iommu_aperture_order = 30;
- -------- ---------    }
+ ++++++++++++++++++    unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
+ ++++++++++++++++++
+ ++++++++++++++++++    if ((order > 24) && (order < 31))
+ ++++++++++++++++++            amd_iommu_aperture_order = order;
          +         
          +             return 1;
          +         }
          +         
          +         __setup("amd_iommu=", parse_amd_iommu_options);
          +         __setup("amd_iommu_size=", parse_amd_iommu_size_options);
@@@@@@@@@@@@@@@@@@@@@ -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -74,7 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 +75,17 @@@@@@@@@@@@@@@@@@@@@ char system_vectors[NR_VECTORS] = { [0 
                    /*
                     * Debug level, exported for io_apic.c
                     */
-- -----------------int apic_verbosity;
++ +++++++++++++++++unsigned int apic_verbosity;
          +         
          +         int pic_mode;
          +         
          +         /* Have we found an MP table */
          +         int smp_found_config;
          +         
          +         static struct resource lapic_resource = {
          +             .name = "Local APIC",
          +             .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
          +         };
                    
                    static unsigned int calibration_result;
                    
@@@@@@@@@@@@@@@@@@@@@ -543,22 -543,22 -514,55 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -532,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 +514,55 @@@@@@@@@@@@@@@@@@@@@ static int __init calibrate_APIC_clock(
                        if (!local_apic_timer_verify_ok) {
                                printk(KERN_WARNING
                                       "APIC timer disabled due to verification failure.\n");
++ +++++++++++++++++                    return -1;
++ +++++++++++++++++    }
++ +++++++++++++++++
++ +++++++++++++++++    return 0;
++ +++++++++++++++++}
++ +++++++++++++++++
++ +++++++++++++++++/*
++ +++++++++++++++++ * Setup the boot APIC
++ +++++++++++++++++ *
++ +++++++++++++++++ * Calibrate and verify the result.
++ +++++++++++++++++ */
++ +++++++++++++++++void __init setup_boot_APIC_clock(void)
++ +++++++++++++++++{
++ +++++++++++++++++    /*
++ +++++++++++++++++     * The local apic timer can be disabled via the kernel
++ +++++++++++++++++     * commandline or from the CPU detection code. Register the lapic
++ +++++++++++++++++     * timer as a dummy clock event source on SMP systems, so the
++ +++++++++++++++++     * broadcast mechanism is used. On UP systems simply ignore it.
++ +++++++++++++++++     */
++ +++++++++++++++++    if (local_apic_timer_disabled) {
                                /* No broadcast on UP ! */
-- -----------------            if (num_possible_cpus() == 1)
-- -----------------                    return;
-- -----------------    } else {
-- -----------------            /*
-- -----------------             * If nmi_watchdog is set to IO_APIC, we need the
-- -----------------             * PIT/HPET going.  Otherwise register lapic as a dummy
-- -----------------             * device.
-- -----------------             */
-- -----------------            if (nmi_watchdog != NMI_IO_APIC)
-- -----------------                    lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-- -----------------            else
-- -----------------                    printk(KERN_WARNING "APIC timer registered as dummy,"
-- ------- ---------                            " due to nmi_watchdog=%d!\n", nmi_watchdog);
          -                                    " due to nmi_watchdog=1!\n");
++ +++++++++++++++++            if (num_possible_cpus() > 1) {
++ +++++++++++++++++                    lapic_clockevent.mult = 1;
++ +++++++++++++++++                    setup_APIC_timer();
++ +++++++++++++++++            }
++ +++++++++++++++++            return;
      ++  + +   +       }
      ++  + +   +   
++ +++++++++++++++++    apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
++ +++++++++++++++++                "calibrating APIC timer ...\n");
++ +++++++++++++++++
++ +++++++++++++++++    if (calibrate_APIC_clock()) {
++ +++++++++++++++++            /* No broadcast on UP ! */
++ +++++++++++++++++            if (num_possible_cpus() > 1)
++ +++++++++++++++++                    setup_APIC_timer();
++ +++++++++++++++++            return;
++ +++  ++ + +++ +++    }
++ +++  ++ + +++ +++
++ +++++++++++++++++    /*
++ +++++++++++++++++     * If nmi_watchdog is set to IO_APIC, we need the
++ +++++++++++++++++     * PIT/HPET going.  Otherwise register lapic as a dummy
++ +++++++++++++++++     * device.
++ +++++++++++++++++     */
++ +++++++++++++++++    if (nmi_watchdog != NMI_IO_APIC)
++ +++++++++++++++++            lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
++ +++++++++++++++++    else
++ +++++++++++++++++            printk(KERN_WARNING "APIC timer registered as dummy,"
++ +++++++++++++++++                    " due to nmi_watchdog=%d!\n", nmi_watchdog);
++ +++++++++++++++++
                        /* Setup the lapic or request the broadcast */
                        setup_APIC_timer();
                    }
                    
                    int __init APIC_init_uniprocessor(void)
                    {
---- ----- ---------    if (disable_apic)
---- ----- ---------            clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
---- ----- ---------
          -             if (enable_local_apic < 0)
          -                     clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
          -         
                        if (!smp_found_config && !cpu_has_apic)
                                return -1;
                    
                         * The reschedule interrupt is a CPU-to-CPU reschedule-helper
                         * IPI, driven by wakeup.
                         */
          -             set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
          +             alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
                    
                        /* IPI for invalidation */
          -             set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
          +             alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
                    
                        /* IPI for generic function call */
          -             set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
          +             alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
      ++  + +   +   
      ++  + +   +       /* IPI for single call function */
      ++  + +   +       set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
      ++  + +   +                               call_function_single_interrupt);
                    }
                    #endif
                    
@@@@@@@@@@@@@@@@@@@@@ -1699,8 -1699,8 -1703,8 -1699,8 -1696,8 -1699,8 -1695,8 -1695,8 -1699,8 -1699,8 -1710,8 -1699,8 -1695,8 -1699,8 -1699,8 -1699,8 -1695,8 -1699,8 -1699,8 -1699,8 +1700,8 @@@@@@@@@@@@@@@@@@@@@ early_param("lapic", parse_lapic)
                    
                    static int __init parse_nolapic(char *arg)
                    {
          -             enable_local_apic = -1;
          -             clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
          +             disable_apic = 1;
---- ----- ---------    clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
++++ +++++++++++++++    setup_clear_cpu_cap(X86_FEATURE_APIC);
                        return 0;
                    }
                    early_param("nolapic", parse_nolapic);
@@@@@@@@@@@@@@@@@@@@@ -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,7 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 +54,10 @@@@@@@@@@@@@@@@@@@@@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_o
                    /*
                     * Debug level, exported for io_apic.c
                     */
-- -----------------int apic_verbosity;
++ +++++++++++++++++unsigned int apic_verbosity;
          +         
          +         /* Have we found an MP table */
          +         int smp_found_config;
                    
                    static struct resource lapic_resource = {
                        .name = "Local APIC",
index 7b8cc72,7b8cc72,7b8cc72,2a4475b,daee611,7b8cc72,7b8cc72,7518502,7b8cc72,7b8cc72,0000000,7b8cc72,7b8cc72,7b8cc72,7b8cc72,7b8cc72,36537ab,7b8cc72,7b8cc72,736f50f..dd6e3f1
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- /dev/null
          +         #include <linux/init.h>
          +         #include <linux/kernel.h>
          +         #include <linux/sched.h>
          +         #include <linux/string.h>
          +         #include <linux/bootmem.h>
          +         #include <linux/bitops.h>
          +         #include <linux/module.h>
          +         #include <linux/kgdb.h>
          +         #include <linux/topology.h>
--- ------ ---------#include <linux/string.h>
          +         #include <linux/delay.h>
          +         #include <linux/smp.h>
--- ------ ---------#include <linux/module.h>
          +         #include <linux/percpu.h>
--- ------ ---------#include <asm/processor.h>
          +         #include <asm/i387.h>
          +         #include <asm/msr.h>
          +         #include <asm/io.h>
+++++++++++++++++++ #include <asm/linkage.h>
          +         #include <asm/mmu_context.h>
          +         #include <asm/mtrr.h>
          +         #include <asm/mce.h>
          +         #include <asm/pat.h>
          +         #include <asm/numa.h>
          +         #ifdef CONFIG_X86_LOCAL_APIC
          +         #include <asm/mpspec.h>
          +         #include <asm/apic.h>
          +         #include <mach_apic.h>
          +         #endif
          +         #include <asm/pda.h>
          +         #include <asm/pgtable.h>
          +         #include <asm/processor.h>
          +         #include <asm/desc.h>
          +         #include <asm/atomic.h>
          +         #include <asm/proto.h>
          +         #include <asm/sections.h>
          +         #include <asm/setup.h>
          +         #include <asm/genapic.h>
          +         
          +         #include "cpu.h"
          +         
          +         /* We need valid kernel segments for data and code in long mode too
          +          * IRET will check the segment types  kkeil 2000/10/28
          +          * Also sysret mandates a special GDT layout
          +          */
          +         /* The TLS descriptors are currently at a different place compared to i386.
          +            Hopefully nobody expects them at a fixed place (Wine?) */
          +         DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
          +             [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
          +             [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
          +             [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
          +             [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
          +             [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
          +             [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
          +         } };
          +         EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
          +         
          +         __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
          +         
          +         /* Current gdt points %fs at the "master" per-cpu area: after this,
          +          * it's on the real one. */
          +         void switch_to_new_gdt(void)
          +         {
          +             struct desc_ptr gdt_descr;
          +         
          +             gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
          +             gdt_descr.size = GDT_SIZE - 1;
          +             load_gdt(&gdt_descr);
          +         }
          +         
          +         struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
          +         
          +         static void __cpuinit default_init(struct cpuinfo_x86 *c)
          +         {
          +             display_cacheinfo(c);
          +         }
          +         
          +         static struct cpu_dev __cpuinitdata default_cpu = {
          +             .c_init = default_init,
          +             .c_vendor = "Unknown",
          +         };
          +         static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
          +         
          +         int __cpuinit get_model_name(struct cpuinfo_x86 *c)
          +         {
          +             unsigned int *v;
          +         
          +             if (c->extended_cpuid_level < 0x80000004)
          +                     return 0;
          +         
          +             v = (unsigned int *) c->x86_model_id;
          +             cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
          +             cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
          +             cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
          +             c->x86_model_id[48] = 0;
          +             return 1;
          +         }
          +         
          +         
          +         void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
          +         {
       -        -       unsigned int n, dummy, eax, ebx, ecx, edx;
       +  +     +       unsigned int n, dummy, ebx, ecx, edx;
          +         
          +             n = c->extended_cpuid_level;
          +         
          +             if (n >= 0x80000005) {
          +                     cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
          +                     printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
          +                            "D cache %dK (%d bytes/line)\n",
          +                            edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
          +                     c->x86_cache_size = (ecx>>24) + (edx>>24);
          +                     /* On K8 L1 TLB is inclusive, so don't count it */
          +                     c->x86_tlbsize = 0;
          +             }
          +         
          +             if (n >= 0x80000006) {
          +                     cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
          +                     ecx = cpuid_ecx(0x80000006);
          +                     c->x86_cache_size = ecx >> 16;
          +                     c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
          +         
          +                     printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
          +                     c->x86_cache_size, ecx & 0xFF);
          +             }
       -        -       if (n >= 0x80000008) {
       -        -               cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
       -        -               c->x86_virt_bits = (eax >> 8) & 0xff;
       -        -               c->x86_phys_bits = eax & 0xff;
       -        -       }
          +         }
          +         
          +         void __cpuinit detect_ht(struct cpuinfo_x86 *c)
          +         {
          +         #ifdef CONFIG_SMP
          +             u32 eax, ebx, ecx, edx;
          +             int index_msb, core_bits;
          +         
          +             cpuid(1, &eax, &ebx, &ecx, &edx);
          +         
          +         
          +             if (!cpu_has(c, X86_FEATURE_HT))
          +                     return;
          +             if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
          +                     goto out;
          +         
          +             smp_num_siblings = (ebx & 0xff0000) >> 16;
          +         
          +             if (smp_num_siblings == 1) {
          +                     printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
          +             } else if (smp_num_siblings > 1) {
          +         
          +                     if (smp_num_siblings > NR_CPUS) {
          +                             printk(KERN_WARNING "CPU: Unsupported number of "
          +                                    "siblings %d", smp_num_siblings);
          +                             smp_num_siblings = 1;
          +                             return;
          +                     }
          +         
          +                     index_msb = get_count_order(smp_num_siblings);
          +                     c->phys_proc_id = phys_pkg_id(index_msb);
          +         
          +                     smp_num_siblings = smp_num_siblings / c->x86_max_cores;
          +         
          +                     index_msb = get_count_order(smp_num_siblings);
          +         
          +                     core_bits = get_count_order(c->x86_max_cores);
          +         
          +                     c->cpu_core_id = phys_pkg_id(index_msb) &
          +                                                    ((1 << core_bits) - 1);
          +             }
          +         out:
          +             if ((c->x86_max_cores * smp_num_siblings) > 1) {
          +                     printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
          +                            c->phys_proc_id);
          +                     printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
          +                            c->cpu_core_id);
          +             }
          +         
          +         #endif
          +         }
          +         
          +         static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
          +         {
          +             char *v = c->x86_vendor_id;
          +             int i;
          +             static int printed;
          +         
          +             for (i = 0; i < X86_VENDOR_NUM; i++) {
          +                     if (cpu_devs[i]) {
          +                             if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
          +                                 (cpu_devs[i]->c_ident[1] &&
          +                                 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
          +                                     c->x86_vendor = i;
          +                                     this_cpu = cpu_devs[i];
          +                                     return;
          +                             }
          +                     }
          +             }
          +             if (!printed) {
          +                     printed++;
          +                     printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
          +                     printk(KERN_ERR "CPU: Your system may be unstable.\n");
          +             }
          +             c->x86_vendor = X86_VENDOR_UNKNOWN;
          +         }
          +         
          +         static void __init early_cpu_support_print(void)
          +         {
          +             int i,j;
          +             struct cpu_dev *cpu_devx;
          +         
          +             printk("KERNEL supported cpus:\n");
          +             for (i = 0; i < X86_VENDOR_NUM; i++) {
          +                     cpu_devx = cpu_devs[i];
          +                     if (!cpu_devx)
          +                             continue;
          +                     for (j = 0; j < 2; j++) {
          +                             if (!cpu_devx->c_ident[j])
          +                                     continue;
          +                             printk("  %s %s\n", cpu_devx->c_vendor,
          +                                     cpu_devx->c_ident[j]);
          +                     }
          +             }
          +         }
          +         
          +         static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
          +         
          +         void __init early_cpu_init(void)
          +         {
          +                 struct cpu_vendor_dev *cvdev;
          +         
          +                 for (cvdev = __x86cpuvendor_start ;
          +                      cvdev < __x86cpuvendor_end   ;
          +                      cvdev++)
          +                         cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
          +             early_cpu_support_print();
          +             early_identify_cpu(&boot_cpu_data);
          +         }
          +         
          +         /* Do some early cpuid on the boot CPU to get some parameter that are
          +            needed before check_bugs. Everything advanced is in identify_cpu
          +            below. */
          +         static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
          +         {
          +             u32 tfms, xlvl;
          +         
          +             c->loops_per_jiffy = loops_per_jiffy;
          +             c->x86_cache_size = -1;
          +             c->x86_vendor = X86_VENDOR_UNKNOWN;
          +             c->x86_model = c->x86_mask = 0; /* So far unknown... */
          +             c->x86_vendor_id[0] = '\0'; /* Unset */
          +             c->x86_model_id[0] = '\0';  /* Unset */
          +             c->x86_clflush_size = 64;
          +             c->x86_cache_alignment = c->x86_clflush_size;
          +             c->x86_max_cores = 1;
          +             c->x86_coreid_bits = 0;
          +             c->extended_cpuid_level = 0;
          +             memset(&c->x86_capability, 0, sizeof c->x86_capability);
          +         
          +             /* Get vendor name */
          +             cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
          +                   (unsigned int *)&c->x86_vendor_id[0],
          +                   (unsigned int *)&c->x86_vendor_id[8],
          +                   (unsigned int *)&c->x86_vendor_id[4]);
          +         
          +             get_cpu_vendor(c);
          +         
          +             /* Initialize the standard set of capabilities */
          +             /* Note that the vendor-specific code below might override */
          +         
          +             /* Intel-defined flags: level 0x00000001 */
          +             if (c->cpuid_level >= 0x00000001) {
          +                     __u32 misc;
          +                     cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
          +                           &c->x86_capability[0]);
          +                     c->x86 = (tfms >> 8) & 0xf;
          +                     c->x86_model = (tfms >> 4) & 0xf;
          +                     c->x86_mask = tfms & 0xf;
          +                     if (c->x86 == 0xf)
          +                             c->x86 += (tfms >> 20) & 0xff;
          +                     if (c->x86 >= 0x6)
          +                             c->x86_model += ((tfms >> 16) & 0xF) << 4;
          +                     if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
          +                             c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
          +             } else {
          +                     /* Have CPUID level 0 only - unheard of */
          +                     c->x86 = 4;
          +             }
          +         
          +             c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
          +         #ifdef CONFIG_SMP
          +             c->phys_proc_id = c->initial_apicid;
          +         #endif
          +             /* AMD-defined flags: level 0x80000001 */
          +             xlvl = cpuid_eax(0x80000000);
          +             c->extended_cpuid_level = xlvl;
          +             if ((xlvl & 0xffff0000) == 0x80000000) {
          +                     if (xlvl >= 0x80000001) {
          +                             c->x86_capability[1] = cpuid_edx(0x80000001);
          +                             c->x86_capability[6] = cpuid_ecx(0x80000001);
          +                     }
          +                     if (xlvl >= 0x80000004)
          +                             get_model_name(c); /* Default name */
          +             }
          +         
          +             /* Transmeta-defined flags: level 0x80860001 */
          +             xlvl = cpuid_eax(0x80860000);
          +             if ((xlvl & 0xffff0000) == 0x80860000) {
          +                     /* Don't set x86_cpuid_level here for now to not confuse. */
          +                     if (xlvl >= 0x80860001)
          +                             c->x86_capability[2] = cpuid_edx(0x80860001);
          +             }
          +         
---- ----- ---------    c->extended_cpuid_level = cpuid_eax(0x80000000);
          +             if (c->extended_cpuid_level >= 0x80000007)
          +                     c->x86_power = cpuid_edx(0x80000007);
          +         
                -       /* Assume all 64-bit CPUs support 32-bit syscall */
                -       set_cpu_cap(c, X86_FEATURE_SYSCALL32);
       +  +     +       if (c->extended_cpuid_level >= 0x80000008) {
       +  +     +               u32 eax = cpuid_eax(0x80000008);
       +  +     +   
       +  +     +               c->x86_virt_bits = (eax >> 8) & 0xff;
       +  +     +               c->x86_phys_bits = eax & 0xff;
       +  +     +       }
       +  +         
------- -- ----- --     /* Assume all 64-bit CPUs support 32-bit syscall */
------- -- ----- --     set_cpu_cap(c, X86_FEATURE_SYSCALL32);
------- -- ----- -- 
          +             if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
          +                 cpu_devs[c->x86_vendor]->c_early_init)
          +                     cpu_devs[c->x86_vendor]->c_early_init(c);
          +         
          +             validate_pat_support(c);
---- ----- ---------
---- ----- ---------    /* early_param could clear that, but recall get it set again */
---- ----- ---------    if (disable_apic)
---- ----- ---------            clear_cpu_cap(c, X86_FEATURE_APIC);
          +         }
          +         
          +         /*
          +          * This does the hard work of actually picking apart the CPU stuff...
          +          */
          +         static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
          +         {
          +             int i;
          +         
          +             early_identify_cpu(c);
          +         
          +             init_scattered_cpuid_features(c);
          +         
          +             c->apicid = phys_pkg_id(0);
          +         
          +             /*
          +              * Vendor-specific initialization.  In this section we
          +              * canonicalize the feature flags, meaning if there are
          +              * features a certain CPU supports which CPUID doesn't
          +              * tell us, CPUID claiming incorrect flags, or other bugs,
          +              * we handle them here.
          +              *
          +              * At the end of this section, c->x86_capability better
          +              * indicate the features this CPU genuinely supports!
          +              */
          +             if (this_cpu->c_init)
          +                     this_cpu->c_init(c);
          +         
          +             detect_ht(c);
          +         
          +             /*
          +              * On SMP, boot_cpu_data holds the common feature set between
          +              * all CPUs; so make sure that we indicate which features are
          +              * common between the CPUs.  The first time this routine gets
          +              * executed, c == &boot_cpu_data.
          +              */
          +             if (c != &boot_cpu_data) {
          +                     /* AND the already accumulated flags with these */
          +                     for (i = 0; i < NCAPINTS; i++)
          +                             boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
          +             }
          +         
          +             /* Clear all flags overriden by options */
          +             for (i = 0; i < NCAPINTS; i++)
          +                     c->x86_capability[i] &= ~cleared_cpu_caps[i];
          +         
          +         #ifdef CONFIG_X86_MCE
          +             mcheck_init(c);
          +         #endif
          +             select_idle_routine(c);
          +         
          +         #ifdef CONFIG_NUMA
          +             numa_add_cpu(smp_processor_id());
          +         #endif
          +         
          +         }
          +         
          +         void __cpuinit identify_boot_cpu(void)
          +         {
          +             identify_cpu(&boot_cpu_data);
          +         }
          +         
          +         void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
          +         {
          +             BUG_ON(c == &boot_cpu_data);
          +             identify_cpu(c);
          +             mtrr_ap_init();
          +         }
          +         
          +         static __init int setup_noclflush(char *arg)
          +         {
          +             setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
          +             return 1;
          +         }
          +         __setup("noclflush", setup_noclflush);
          +         
          +         void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
          +         {
          +             if (c->x86_model_id[0])
          +                     printk(KERN_CONT "%s", c->x86_model_id);
          +         
          +             if (c->x86_mask || c->cpuid_level >= 0)
          +                     printk(KERN_CONT " stepping %02x\n", c->x86_mask);
          +             else
          +                     printk(KERN_CONT "\n");
          +         }
          +         
          +         static __init int setup_disablecpuid(char *arg)
          +         {
          +             int bit;
          +             if (get_option(&arg, &bit) && bit < NCAPINTS*32)
          +                     setup_clear_cpu_cap(bit);
          +             else
          +                     return 0;
          +             return 1;
          +         }
          +         __setup("clearcpuid=", setup_disablecpuid);
          +         
          +         cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
          +         
          +         struct x8664_pda **_cpu_pda __read_mostly;
          +         EXPORT_SYMBOL(_cpu_pda);
          +         
          +         struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
          +         
          +         char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
          +         
          +         unsigned long __supported_pte_mask __read_mostly = ~0UL;
          +         EXPORT_SYMBOL_GPL(__supported_pte_mask);
          +         
          +         static int do_not_nx __cpuinitdata;
          +         
          +         /* noexec=on|off
          +         Control non executable mappings for 64bit processes.
          +         
          +         on  Enable(default)
          +         off Disable
          +         */
          +         static int __init nonx_setup(char *str)
          +         {
          +             if (!str)
          +                     return -EINVAL;
          +             if (!strncmp(str, "on", 2)) {
          +                     __supported_pte_mask |= _PAGE_NX;
          +                     do_not_nx = 0;
          +             } else if (!strncmp(str, "off", 3)) {
          +                     do_not_nx = 1;
          +                     __supported_pte_mask &= ~_PAGE_NX;
          +             }
          +             return 0;
          +         }
          +         early_param("noexec", nonx_setup);
          +         
          +         int force_personality32;
          +         
          +         /* noexec32=on|off
          +         Control non executable heap for 32bit processes.
          +         To control the stack too use noexec=off
          +         
          +         on  PROT_READ does not imply PROT_EXEC for 32bit processes (default)
          +         off PROT_READ implies PROT_EXEC
          +         */
          +         static int __init nonx32_setup(char *str)
          +         {
          +             if (!strcmp(str, "on"))
          +                     force_personality32 &= ~READ_IMPLIES_EXEC;
          +             else if (!strcmp(str, "off"))
          +                     force_personality32 |= READ_IMPLIES_EXEC;
          +             return 1;
          +         }
          +         __setup("noexec32=", nonx32_setup);
          +         
          +         void pda_init(int cpu)
          +         {
          +             struct x8664_pda *pda = cpu_pda(cpu);
          +         
          +             /* Setup up data that may be needed in __get_free_pages early */
          +             loadsegment(fs, 0);
          +             loadsegment(gs, 0);
          +             /* Memory clobbers used to order PDA accessed */
          +             mb();
          +             wrmsrl(MSR_GS_BASE, pda);
          +             mb();
          +         
          +             pda->cpunumber = cpu;
          +             pda->irqcount = -1;
          +             pda->kernelstack = (unsigned long)stack_thread_info() -
          +                                      PDA_STACKOFFSET + THREAD_SIZE;
          +             pda->active_mm = &init_mm;
          +             pda->mmu_state = 0;
          +         
          +             if (cpu == 0) {
          +                     /* others are initialized in smpboot.c */
          +                     pda->pcurrent = &init_task;
          +                     pda->irqstackptr = boot_cpu_stack;
          +             } else {
          +                     pda->irqstackptr = (char *)
          +                             __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
          +                     if (!pda->irqstackptr)
          +                             panic("cannot allocate irqstack for cpu %d", cpu);
          +         
          +                     if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
          +                             pda->nodenumber = cpu_to_node(cpu);
          +             }
          +         
          +             pda->irqstackptr += IRQSTACKSIZE-64;
          +         }
          +         
          +         char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
---------- --------                        DEBUG_STKSZ]
---------- -------- __attribute__((section(".bss.page_aligned")));
+++++++++++++++++++                        DEBUG_STKSZ] __page_aligned_bss;
          +         
          +         extern asmlinkage void ignore_sysret(void);
          +         
          +         /* May not be marked __init: used by software suspend */
          +         void syscall_init(void)
          +         {
          +             /*
          +              * LSTAR and STAR live in a bit strange symbiosis.
          +              * They both write to the same internal register. STAR allows to
          +              * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
          +              */
          +             wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32);
          +             wrmsrl(MSR_LSTAR, system_call);
          +             wrmsrl(MSR_CSTAR, ignore_sysret);
          +         
          +         #ifdef CONFIG_IA32_EMULATION
          +             syscall32_cpu_init();
          +         #endif
          +         
          +             /* Flags to clear on syscall */
          +             wrmsrl(MSR_SYSCALL_MASK,
          +                    X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
          +         }
          +         
          +         void __cpuinit check_efer(void)
          +         {
          +             unsigned long efer;
          +         
          +             rdmsrl(MSR_EFER, efer);
          +             if (!(efer & EFER_NX) || do_not_nx)
          +                     __supported_pte_mask &= ~_PAGE_NX;
          +         }
          +         
          +         unsigned long kernel_eflags;
          +         
          +         /*
          +          * Copies of the original ist values from the tss are only accessed during
          +          * debugging, no special alignment required.
          +          */
          +         DEFINE_PER_CPU(struct orig_ist, orig_ist);
          +         
          +         /*
          +          * cpu_init() initializes state that is per-CPU. Some data is already
          +          * initialized (naturally) in the bootstrap process, such as the GDT
          +          * and IDT. We reload them nevertheless, this function acts as a
          +          * 'CPU state barrier', nothing should get across.
          +          * A lot of state is already set up in PDA init.
          +          */
          +         void __cpuinit cpu_init(void)
          +         {
          +             int cpu = stack_smp_processor_id();
          +             struct tss_struct *t = &per_cpu(init_tss, cpu);
          +             struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
          +             unsigned long v;
          +             char *estacks = NULL;
          +             struct task_struct *me;
          +             int i;
          +         
          +             /* CPU 0 is initialised in head64.c */
          +             if (cpu != 0)
          +                     pda_init(cpu);
          +             else
          +                     estacks = boot_exception_stacks;
          +         
          +             me = current;
          +         
          +             if (cpu_test_and_set(cpu, cpu_initialized))
          +                     panic("CPU#%d already initialized!\n", cpu);
          +         
          +             printk(KERN_INFO "Initializing CPU#%d\n", cpu);
          +         
          +             clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
          +         
          +             /*
          +              * Initialize the per-CPU GDT with the boot GDT,
          +              * and set up the GDT descriptor:
          +              */
          +         
          +             switch_to_new_gdt();
          +             load_idt((const struct desc_ptr *)&idt_descr);
          +         
          +             memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
          +             syscall_init();
          +         
          +             wrmsrl(MSR_FS_BASE, 0);
          +             wrmsrl(MSR_KERNEL_GS_BASE, 0);
          +             barrier();
          +         
          +             check_efer();
          +         
          +             /*
          +              * set up and load the per-CPU TSS
          +              */
          +             for (v = 0; v < N_EXCEPTION_STACKS; v++) {
          +                     static const unsigned int order[N_EXCEPTION_STACKS] = {
          +                             [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
          +                             [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
          +                     };
          +                     if (cpu) {
          +                             estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
          +                             if (!estacks)
          +                                     panic("Cannot allocate exception stack %ld %d\n",
          +                                           v, cpu);
          +                     }
          +                     estacks += PAGE_SIZE << order[v];
          +                     orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
          +             }
          +         
          +             t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
          +             /*
          +              * <= is required because the CPU will access up to
          +              * 8 bits beyond the end of the IO permission bitmap.
          +              */
          +             for (i = 0; i <= IO_BITMAP_LONGS; i++)
          +                     t->io_bitmap[i] = ~0UL;
          +         
          +             atomic_inc(&init_mm.mm_count);
          +             me->active_mm = &init_mm;
          +             if (me->mm)
          +                     BUG();
          +             enter_lazy_tlb(&init_mm, me);
          +         
          +             load_sp0(t, &current->thread);
          +             set_tss_desc(cpu, t);
          +             load_TR_desc();
          +             load_LDT(&init_mm.context);
          +         
          +         #ifdef CONFIG_KGDB
          +             /*
          +              * If the kgdb is connected no debug regs should be altered.  This
          +              * is only applicable when KGDB and a KGDB I/O module are built
          +              * into the kernel and you are using early debugging with
          +              * kgdbwait. KGDB will control the kernel HW breakpoint registers.
          +              */
          +             if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
          +                     arch_kgdb_ops.correct_hw_break();
          +             else {
          +         #endif
          +             /*
          +              * Clear all 6 debug registers:
          +              */
          +         
          +             set_debugreg(0UL, 0);
          +             set_debugreg(0UL, 1);
          +             set_debugreg(0UL, 2);
          +             set_debugreg(0UL, 3);
          +             set_debugreg(0UL, 6);
          +             set_debugreg(0UL, 7);
          +         #ifdef CONFIG_KGDB
          +             /* If the kgdb is connected no debug regs should be altered. */
          +             }
          +         #endif
          +         
          +             fpu_init();
          +         
          +             raw_local_save_flags(kernel_eflags);
          +         
          +             if (is_uv_system())
          +                     uv_cpu_init();
          +         }
Simple merge
                    #include <asm/percpu.h>
                    #include <asm/dwarf2.h>
                    #include <asm/processor-flags.h>
          -         #include "irq_vectors.h"
       +  +     +   #include <asm/ftrace.h>
          +         #include <asm/irq_vectors.h>
                    
                    /*
                     * We use macros for low-level operations which need to be overridden
                    ENTRY(xen_sysenter_target)
                        RING0_INT_FRAME
                        addl $5*4, %esp         /* remove xen-provided frame */
++++++++++++++++++ +    CFI_ADJUST_CFA_OFFSET -5*4
                        jmp sysenter_past_esp
       +  +             CFI_ENDPROC
                    
                    ENTRY(xen_hypervisor_callback)
                        CFI_STARTPROC
@@@@@@@@@@@@@@@@@@@@@ -349,8 -349,8 -349,8 -349,8 -349,8 -349,8 -349,8 -243,8 -349,8 -349,8 -244,7 -349,8 -349,8 -349,8 -349,8 -349,7 -243,8 -349,8 -349,8 -349,8 +349,7 @@@@@@@@@@@@@@@@@@@@@ ENTRY(system_call_after_swapgs
                        movq  %rcx,RIP-ARGOFFSET(%rsp)
                        CFI_REL_OFFSET rip,RIP-ARGOFFSET
                        GET_THREAD_INFO(%rcx)
---------- ---- ----    testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----            TI_flags(%rcx)
          -             testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
+++++++++++++++ ++++    testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
                        jnz tracesys
                        cmpq $__NR_syscall_max,%rax
                        ja badsys
index ec024b3,ec024b3,384b49f,ec024b3,ec024b3,ec024b3,716b892,8dfe9db,ec024b3,ec024b3,0000000,e0b44b7,716b892,ec024b3,ec024b3,ec024b3,716b892,ec024b3,ec024b3,ec024b3..ac6d512
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- /dev/null
          +         /*
          +          *  NMI watchdog support on APIC systems
          +          *
          +          *  Started by Ingo Molnar <mingo@redhat.com>
          +          *
          +          *  Fixes:
          +          *  Mikael Pettersson       : AMD K7 support for local APIC NMI watchdog.
          +          *  Mikael Pettersson       : Power Management for local APIC NMI watchdog.
          +          *  Mikael Pettersson       : Pentium 4 support for local APIC NMI watchdog.
          +          *  Pavel Machek and
          +          *  Mikael Pettersson       : PM converted to driver model. Disable/enable API.
          +          */
          +         
          +         #include <asm/apic.h>
          +         
          +         #include <linux/nmi.h>
          +         #include <linux/mm.h>
          +         #include <linux/delay.h>
          +         #include <linux/interrupt.h>
          +         #include <linux/module.h>
          +         #include <linux/sysdev.h>
          +         #include <linux/sysctl.h>
          +         #include <linux/percpu.h>
          +         #include <linux/kprobes.h>
          +         #include <linux/cpumask.h>
          +         #include <linux/kernel_stat.h>
          +         #include <linux/kdebug.h>
          +         #include <linux/smp.h>
          +         
          +         #include <asm/i8259.h>
          +         #include <asm/io_apic.h>
          +         #include <asm/smp.h>
          +         #include <asm/nmi.h>
          +         #include <asm/proto.h>
          +         #include <asm/timer.h>
          +         
          +         #include <asm/mce.h>
          +         
          +         #include <mach_traps.h>
          +         
          +         int unknown_nmi_panic;
          +         int nmi_watchdog_enabled;
          +         
          +         static cpumask_t backtrace_mask = CPU_MASK_NONE;
          +         
          +         /* nmi_active:
          +          * >0: the lapic NMI watchdog is active, but can be disabled
          +          * <0: the lapic NMI watchdog has not been set up, and cannot
          +          *     be enabled
          +          *  0: the lapic NMI watchdog is disabled, but can be enabled
          +          */
          +         atomic_t nmi_active = ATOMIC_INIT(0);               /* oprofile uses this */
          +         EXPORT_SYMBOL(nmi_active);
          +         
          +         unsigned int nmi_watchdog = NMI_NONE;
          +         EXPORT_SYMBOL(nmi_watchdog);
          +         
          +         static int panic_on_timeout;
          +         
          +         static unsigned int nmi_hz = HZ;
          +         static DEFINE_PER_CPU(short, wd_enabled);
          +         static int endflag __initdata;
          +         
          +         static inline unsigned int get_nmi_count(int cpu)
          +         {
          +         #ifdef CONFIG_X86_64
          +             return cpu_pda(cpu)->__nmi_count;
          +         #else
          +             return nmi_count(cpu);
          +         #endif
          +         }
          +         
          +         static inline int mce_in_progress(void)
          +         {
          +         #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
          +             return atomic_read(&mce_entry) > 0;
          +         #endif
          +             return 0;
          +         }
          +         
          +         /*
          +          * Take the local apic timer and PIT/HPET into account. We don't
          +          * know which one is active, when we have highres/dyntick on
          +          */
          +         static inline unsigned int get_timer_irqs(int cpu)
          +         {
          +         #ifdef CONFIG_X86_64
          +             return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
          +         #else
          +             return per_cpu(irq_stat, cpu).apic_timer_irqs +
          +                     per_cpu(irq_stat, cpu).irq0_irqs;
          +         #endif
          +         }
          +         
          +         #ifdef CONFIG_SMP
          +         /*
          +          * The performance counters used by NMI_LOCAL_APIC don't trigger when
          +          * the CPU is idle. To make sure the NMI watchdog really ticks on all
          +          * CPUs during the test make them busy.
          +          */
          +         static __init void nmi_cpu_busy(void *data)
          +         {
          +             local_irq_enable_in_hardirq();
          +             /*
          +              * Intentionally don't use cpu_relax here. This is
          +              * to make sure that the performance counter really ticks,
          +              * even if there is a simulator or similar that catches the
          +              * pause instruction. On a real HT machine this is fine because
          +              * all other CPUs are busy with "useless" delay loops and don't
          +              * care if they get somewhat less cycles.
          +              */
          +             while (endflag == 0)
          +                     mb();
          +         }
          +         #endif
          +         
          +         int __init check_nmi_watchdog(void)
          +         {
          +             unsigned int *prev_nmi_count;
          +             int cpu;
          +         
          +             if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
          +                     return 0;
          +         
          +             prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
          +             if (!prev_nmi_count)
          +                     goto error;
          +         
          +             printk(KERN_INFO "Testing NMI watchdog ... ");
          +         
          +         #ifdef CONFIG_SMP
          +             if (nmi_watchdog == NMI_LOCAL_APIC)
      --    -   -               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
      ++  + +   +               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
          +         #endif
          +         
          +             for_each_possible_cpu(cpu)
          +                     prev_nmi_count[cpu] = get_nmi_count(cpu);
          +             local_irq_enable();
          +             mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
          +         
          +             for_each_online_cpu(cpu) {
          +                     if (!per_cpu(wd_enabled, cpu))
          +                             continue;
          +                     if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
          +                             printk(KERN_WARNING "WARNING: CPU#%d: NMI "
          +                                     "appears to be stuck (%d->%d)!\n",
          +                                     cpu,
          +                                     prev_nmi_count[cpu],
          +                                     get_nmi_count(cpu));
          +                             per_cpu(wd_enabled, cpu) = 0;
          +                             atomic_dec(&nmi_active);
          +                     }
          +             }
          +             endflag = 1;
          +             if (!atomic_read(&nmi_active)) {
          +                     kfree(prev_nmi_count);
          +                     atomic_set(&nmi_active, -1);
          +                     goto error;
          +             }
          +             printk("OK.\n");
          +         
          +             /*
          +              * now that we know it works we can reduce NMI frequency to
          +              * something more reasonable; makes a difference in some configs
          +              */
          +             if (nmi_watchdog == NMI_LOCAL_APIC)
          +                     nmi_hz = lapic_adjust_nmi_hz(1);
          +         
          +             kfree(prev_nmi_count);
          +             return 0;
          +         error:
          +             if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
          +                     disable_8259A_irq(0);
       +  +         #ifdef CONFIG_X86_32
       +  +             timer_ack = 0;
       +  +         #endif
          +             return -1;
          +         }
          +         
          +         static int __init setup_nmi_watchdog(char *str)
          +         {
          +             unsigned int nmi;
          +         
          +             if (!strncmp(str, "panic", 5)) {
          +                     panic_on_timeout = 1;
          +                     str = strchr(str, ',');
          +                     if (!str)
          +                             return 1;
          +                     ++str;
          +             }
          +         
          +             get_option(&str, &nmi);
          +         
          +             if (nmi >= NMI_INVALID)
          +                     return 0;
          +         
          +             nmi_watchdog = nmi;
          +             return 1;
          +         }
          +         __setup("nmi_watchdog=", setup_nmi_watchdog);
          +         
          +         /*
          +          * Suspend/resume support
          +          */
          +         #ifdef CONFIG_PM
          +         
          +         static int nmi_pm_active; /* nmi_active before suspend */
          +         
          +         static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
          +         {
          +             /* only CPU0 goes here, other CPUs should be offline */
          +             nmi_pm_active = atomic_read(&nmi_active);
          +             stop_apic_nmi_watchdog(NULL);
          +             BUG_ON(atomic_read(&nmi_active) != 0);
          +             return 0;
          +         }
          +         
          +         static int lapic_nmi_resume(struct sys_device *dev)
          +         {
          +             /* only CPU0 goes here, other CPUs should be offline */
          +             if (nmi_pm_active > 0) {
          +                     setup_apic_nmi_watchdog(NULL);
          +                     touch_nmi_watchdog();
          +             }
          +             return 0;
          +         }
          +         
          +         static struct sysdev_class nmi_sysclass = {
          +             .name           = "lapic_nmi",
          +             .resume         = lapic_nmi_resume,
          +             .suspend        = lapic_nmi_suspend,
          +         };
          +         
          +         static struct sys_device device_lapic_nmi = {
          +             .id     = 0,
         &nbs