Merge branch 'linus' into core/iommu
Ingo Molnar [Sun, 7 Jun 2009 09:34:59 +0000 (11:34 +0200)]
Merge reason: This branch was on an -rc5 base so pull almost-2.6.30
              to resync with the latest upstream fixes and make sure
              the combination works fine.

Signed-off-by: Ingo Molnar <mingo@elte.hu>

13 files changed:
Documentation/kernel-parameters.txt
arch/x86/Kconfig.debug
arch/x86/include/asm/amd_iommu.h
arch/x86/include/asm/amd_iommu_types.h
arch/x86/kernel/amd_iommu.c
arch/x86/kernel/amd_iommu_init.c
arch/x86/kernel/pci-calgary_64.c
arch/x86/kernel/pci-gart_64.c
arch/x86/kernel/pci-swiotlb.c
include/linux/dma-debug.h
include/linux/swiotlb.h
lib/dma-debug.c
lib/swiotlb.c

index fd5cac0..e2999cb 100644 (file)
@@ -329,11 +329,6 @@ and is between 256 and 4096 characters. It is defined in the file
                                    flushed before they will be reused, which
                                    is a lot of faster
 
-       amd_iommu_size= [HW,X86-64]
-                       Define the size of the aperture for the AMD IOMMU
-                       driver. Possible values are:
-                       '32M', '64M' (default), '128M', '256M', '512M', '1G'
-
        amijoy.map=     [HW,JOY] Amiga joystick support
                        Map of devices attached to JOY0DAT and JOY1DAT
                        Format: <a>,<b>
index d8359e7..33fac6b 100644 (file)
@@ -159,10 +159,17 @@ config IOMMU_DEBUG
          options. See Documentation/x86_64/boot-options.txt for more
          details.
 
+config IOMMU_STRESS
+       bool "Enable IOMMU stress-test mode"
+       ---help---
+         This option disables various optimizations in IOMMU related
+         code to do real stress testing of the IOMMU code. This option
+         will cause a performance drop and should only be enabled for
+         testing.
+
 config IOMMU_LEAK
        bool "IOMMU leak tracing"
-       depends on DEBUG_KERNEL
-       depends on IOMMU_DEBUG
+       depends on IOMMU_DEBUG && DMA_API_DEBUG
        ---help---
          Add a simple leak tracer to the IOMMU code. This is useful when you
          are debugging a buggy device driver that leaks IOMMU mappings.
index f712344..262e028 100644 (file)
@@ -27,6 +27,8 @@ extern int amd_iommu_init(void);
 extern int amd_iommu_init_dma_ops(void);
 extern void amd_iommu_detect(void);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
+extern void amd_iommu_flush_all_domains(void);
+extern void amd_iommu_flush_all_devices(void);
 #else
 static inline int amd_iommu_init(void) { return -ENODEV; }
 static inline void amd_iommu_detect(void) { }
index 95c8cd9..0c878ca 100644 (file)
 #define PD_DMA_OPS_MASK                (1UL << 0) /* domain used for dma_ops */
 #define PD_DEFAULT_MASK                (1UL << 1) /* domain is a default dma_ops
                                              domain for an IOMMU */
+extern bool amd_iommu_dump;
+#define DUMP_printk(format, arg...)                                    \
+       do {                                                            \
+               if (amd_iommu_dump)                                             \
+                       printk(KERN_INFO "AMD IOMMU: " format, ## arg); \
+       } while(0);
+
+/*
+ * Make iterating over all IOMMUs easier
+ */
+#define for_each_iommu(iommu) \
+       list_for_each_entry((iommu), &amd_iommu_list, list)
+#define for_each_iommu_safe(iommu, next) \
+       list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
+
+#define APERTURE_RANGE_SHIFT   27      /* 128 MB */
+#define APERTURE_RANGE_SIZE    (1ULL << APERTURE_RANGE_SHIFT)
+#define APERTURE_RANGE_PAGES   (APERTURE_RANGE_SIZE >> PAGE_SHIFT)
+#define APERTURE_MAX_RANGES    32      /* allows 4GB of DMA address space */
+#define APERTURE_RANGE_INDEX(a)        ((a) >> APERTURE_RANGE_SHIFT)
+#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
 
 /*
  * This structure contains generic data for  IOMMU protection domains
@@ -210,6 +231,26 @@ struct protection_domain {
 };
 
 /*
+ * For dynamic growth the aperture size is split into ranges of 128MB of
+ * DMA address space each. This struct represents one such range.
+ */
+struct aperture_range {
+
+       /* address allocation bitmap */
+       unsigned long *bitmap;
+
+       /*
+        * Array of PTE pages for the aperture. In this array we save all the
+        * leaf pages of the domain page table used for the aperture. This way
+        * we don't need to walk the page table to find a specific PTE. We can
+        * just calculate its address in constant time.
+        */
+       u64 *pte_pages[64];
+
+       unsigned long offset;
+};
+
+/*
  * Data container for a dma_ops specific protection domain
  */
 struct dma_ops_domain {
@@ -222,18 +263,10 @@ struct dma_ops_domain {
        unsigned long aperture_size;
 
        /* address we start to search for free addresses */
-       unsigned long next_bit;
-
-       /* address allocation bitmap */
-       unsigned long *bitmap;
+       unsigned long next_address;
 
-       /*
-        * Array of PTE pages for the aperture. In this array we save all the
-        * leaf pages of the domain page table used for the aperture. This way
-        * we don't need to walk the page table to find a specific PTE. We can
-        * just calculate its address in constant time.
-        */
-       u64 **pte_pages;
+       /* address space relevant data */
+       struct aperture_range *aperture[APERTURE_MAX_RANGES];
 
        /* This will be set to true when TLB needs to be flushed */
        bool need_flush;
index a97db99..2c63d87 100644 (file)
@@ -55,7 +55,16 @@ struct iommu_cmd {
 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
                             struct unity_map_entry *e);
 static struct dma_ops_domain *find_protection_domain(u16 devid);
+static u64* alloc_pte(struct protection_domain *dom,
+                     unsigned long address, u64
+                     **pte_page, gfp_t gfp);
+static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
+                                     unsigned long start_page,
+                                     unsigned int pages);
 
+#ifndef BUS_NOTIFY_UNBOUND_DRIVER
+#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
+#endif
 
 #ifdef CONFIG_AMD_IOMMU_STATS
 
@@ -213,7 +222,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
 {
        struct amd_iommu *iommu;
 
-       list_for_each_entry(iommu, &amd_iommu_list, list)
+       for_each_iommu(iommu)
                iommu_poll_events(iommu);
 
        return IRQ_HANDLED;
@@ -440,7 +449,7 @@ static void iommu_flush_domain(u16 domid)
        __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
                                      domid, 1, 1);
 
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
+       for_each_iommu(iommu) {
                spin_lock_irqsave(&iommu->lock, flags);
                __iommu_queue_command(iommu, &cmd);
                __iommu_completion_wait(iommu);
@@ -449,6 +458,35 @@ static void iommu_flush_domain(u16 domid)
        }
 }
 
+void amd_iommu_flush_all_domains(void)
+{
+       int i;
+
+       for (i = 1; i < MAX_DOMAIN_ID; ++i) {
+               if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
+                       continue;
+               iommu_flush_domain(i);
+       }
+}
+
+void amd_iommu_flush_all_devices(void)
+{
+       struct amd_iommu *iommu;
+       int i;
+
+       for (i = 0; i <= amd_iommu_last_bdf; ++i) {
+               if (amd_iommu_pd_table[i] == NULL)
+                       continue;
+
+               iommu = amd_iommu_rlookup_table[i];
+               if (!iommu)
+                       continue;
+
+               iommu_queue_inv_dev_entry(iommu, i);
+               iommu_completion_wait(iommu);
+       }
+}
+
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
@@ -468,7 +506,7 @@ static int iommu_map_page(struct protection_domain *dom,
                          unsigned long phys_addr,
                          int prot)
 {
-       u64 __pte, *pte, *page;
+       u64 __pte, *pte;
 
        bus_addr  = PAGE_ALIGN(bus_addr);
        phys_addr = PAGE_ALIGN(phys_addr);
@@ -477,27 +515,7 @@ static int iommu_map_page(struct protection_domain *dom,
        if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
                return -EINVAL;
 
-       pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
-
-       if (!IOMMU_PTE_PRESENT(*pte)) {
-               page = (u64 *)get_zeroed_page(GFP_KERNEL);
-               if (!page)
-                       return -ENOMEM;
-               *pte = IOMMU_L2_PDE(virt_to_phys(page));
-       }
-
-       pte = IOMMU_PTE_PAGE(*pte);
-       pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
-
-       if (!IOMMU_PTE_PRESENT(*pte)) {
-               page = (u64 *)get_zeroed_page(GFP_KERNEL);
-               if (!page)
-                       return -ENOMEM;
-               *pte = IOMMU_L1_PDE(virt_to_phys(page));
-       }
-
-       pte = IOMMU_PTE_PAGE(*pte);
-       pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
+       pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
 
        if (IOMMU_PTE_PRESENT(*pte))
                return -EBUSY;
@@ -595,7 +613,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
                 * as allocated in the aperture
                 */
                if (addr < dma_dom->aperture_size)
-                       __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
+                       __set_bit(addr >> PAGE_SHIFT,
+                                 dma_dom->aperture[0]->bitmap);
        }
 
        return 0;
@@ -632,42 +651,191 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
  ****************************************************************************/
 
 /*
- * The address allocator core function.
+ * The address allocator core functions.
  *
  * called with domain->lock held
  */
+
+/*
+ * This function checks if there is a PTE for a given dma address. If
+ * there is one, it returns the pointer to it.
+ */
+static u64* fetch_pte(struct protection_domain *domain,
+                     unsigned long address)
+{
+       u64 *pte;
+
+       pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+       if (!IOMMU_PTE_PRESENT(*pte))
+               return NULL;
+
+       pte = IOMMU_PTE_PAGE(*pte);
+       pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+       if (!IOMMU_PTE_PRESENT(*pte))
+               return NULL;
+
+       pte = IOMMU_PTE_PAGE(*pte);
+       pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+       return pte;
+}
+
+/*
+ * This function is used to add a new aperture range to an existing
+ * aperture in case of dma_ops domain allocation or address allocation
+ * failure.
+ */
+static int alloc_new_range(struct amd_iommu *iommu,
+                          struct dma_ops_domain *dma_dom,
+                          bool populate, gfp_t gfp)
+{
+       int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
+       int i;
+
+#ifdef CONFIG_IOMMU_STRESS
+       populate = false;
+#endif
+
+       if (index >= APERTURE_MAX_RANGES)
+               return -ENOMEM;
+
+       dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
+       if (!dma_dom->aperture[index])
+               return -ENOMEM;
+
+       dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
+       if (!dma_dom->aperture[index]->bitmap)
+               goto out_free;
+
+       dma_dom->aperture[index]->offset = dma_dom->aperture_size;
+
+       if (populate) {
+               unsigned long address = dma_dom->aperture_size;
+               int i, num_ptes = APERTURE_RANGE_PAGES / 512;
+               u64 *pte, *pte_page;
+
+               for (i = 0; i < num_ptes; ++i) {
+                       pte = alloc_pte(&dma_dom->domain, address,
+                                       &pte_page, gfp);
+                       if (!pte)
+                               goto out_free;
+
+                       dma_dom->aperture[index]->pte_pages[i] = pte_page;
+
+                       address += APERTURE_RANGE_SIZE / 64;
+               }
+       }
+
+       dma_dom->aperture_size += APERTURE_RANGE_SIZE;
+
+       /* Intialize the exclusion range if necessary */
+       if (iommu->exclusion_start &&
+           iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
+           iommu->exclusion_start < dma_dom->aperture_size) {
+               unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
+               int pages = iommu_num_pages(iommu->exclusion_start,
+                                           iommu->exclusion_length,
+                                           PAGE_SIZE);
+               dma_ops_reserve_addresses(dma_dom, startpage, pages);
+       }
+
+       /*
+        * Check for areas already mapped as present in the new aperture
+        * range and mark those pages as reserved in the allocator. Such
+        * mappings may already exist as a result of requested unity
+        * mappings for devices.
+        */
+       for (i = dma_dom->aperture[index]->offset;
+            i < dma_dom->aperture_size;
+            i += PAGE_SIZE) {
+               u64 *pte = fetch_pte(&dma_dom->domain, i);
+               if (!pte || !IOMMU_PTE_PRESENT(*pte))
+                       continue;
+
+               dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
+       }
+
+       return 0;
+
+out_free:
+       free_page((unsigned long)dma_dom->aperture[index]->bitmap);
+
+       kfree(dma_dom->aperture[index]);
+       dma_dom->aperture[index] = NULL;
+
+       return -ENOMEM;
+}
+
+static unsigned long dma_ops_area_alloc(struct device *dev,
+                                       struct dma_ops_domain *dom,
+                                       unsigned int pages,
+                                       unsigned long align_mask,
+                                       u64 dma_mask,
+                                       unsigned long start)
+{
+       unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
+       int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
+       int i = start >> APERTURE_RANGE_SHIFT;
+       unsigned long boundary_size;
+       unsigned long address = -1;
+       unsigned long limit;
+
+       next_bit >>= PAGE_SHIFT;
+
+       boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+                       PAGE_SIZE) >> PAGE_SHIFT;
+
+       for (;i < max_index; ++i) {
+               unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
+
+               if (dom->aperture[i]->offset >= dma_mask)
+                       break;
+
+               limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
+                                              dma_mask >> PAGE_SHIFT);
+
+               address = iommu_area_alloc(dom->aperture[i]->bitmap,
+                                          limit, next_bit, pages, 0,
+                                           boundary_size, align_mask);
+               if (address != -1) {
+                       address = dom->aperture[i]->offset +
+                                 (address << PAGE_SHIFT);
+                       dom->next_address = address + (pages << PAGE_SHIFT);
+                       break;
+               }
+
+               next_bit = 0;
+       }
+
+       return address;
+}
+
 static unsigned long dma_ops_alloc_addresses(struct device *dev,
                                             struct dma_ops_domain *dom,
                                             unsigned int pages,
                                             unsigned long align_mask,
                                             u64 dma_mask)
 {
-       unsigned long limit;
        unsigned long address;
-       unsigned long boundary_size;
 
-       boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
-                       PAGE_SIZE) >> PAGE_SHIFT;
-       limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0,
-                                      dma_mask >> PAGE_SHIFT);
+#ifdef CONFIG_IOMMU_STRESS
+       dom->next_address = 0;
+       dom->need_flush = true;
+#endif
 
-       if (dom->next_bit >= limit) {
-               dom->next_bit = 0;
-               dom->need_flush = true;
-       }
+       address = dma_ops_area_alloc(dev, dom, pages, align_mask,
+                                    dma_mask, dom->next_address);
 
-       address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
-                                  0 , boundary_size, align_mask);
        if (address == -1) {
-               address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
-                               0, boundary_size, align_mask);
+               dom->next_address = 0;
+               address = dma_ops_area_alloc(dev, dom, pages, align_mask,
+                                            dma_mask, 0);
                dom->need_flush = true;
        }
 
-       if (likely(address != -1)) {
-               dom->next_bit = address + pages;
-               address <<= PAGE_SHIFT;
-       } else
+       if (unlikely(address == -1))
                address = bad_dma_address;
 
        WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
@@ -684,11 +852,23 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
                                   unsigned long address,
                                   unsigned int pages)
 {
-       address >>= PAGE_SHIFT;
-       iommu_area_free(dom->bitmap, address, pages);
+       unsigned i = address >> APERTURE_RANGE_SHIFT;
+       struct aperture_range *range = dom->aperture[i];
+
+       BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
+
+#ifdef CONFIG_IOMMU_STRESS
+       if (i < 4)
+               return;
+#endif
 
-       if (address >= dom->next_bit)
+       if (address >= dom->next_address)
                dom->need_flush = true;
+
+       address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
+
+       iommu_area_free(range->bitmap, address, pages);
+
 }
 
 /****************************************************************************
@@ -736,12 +916,16 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
                                      unsigned long start_page,
                                      unsigned int pages)
 {
-       unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
+       unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
 
        if (start_page + pages > last_page)
                pages = last_page - start_page;
 
-       iommu_area_reserve(dom->bitmap, start_page, pages);
+       for (i = start_page; i < start_page + pages; ++i) {
+               int index = i / APERTURE_RANGE_PAGES;
+               int page  = i % APERTURE_RANGE_PAGES;
+               __set_bit(page, dom->aperture[index]->bitmap);
+       }
 }
 
 static void free_pagetable(struct protection_domain *domain)
@@ -780,14 +964,19 @@ static void free_pagetable(struct protection_domain *domain)
  */
 static void dma_ops_domain_free(struct dma_ops_domain *dom)
 {
+       int i;
+
        if (!dom)
                return;
 
        free_pagetable(&dom->domain);
 
-       kfree(dom->pte_pages);
-
-       kfree(dom->bitmap);
+       for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
+               if (!dom->aperture[i])
+                       continue;
+               free_page((unsigned long)dom->aperture[i]->bitmap);
+               kfree(dom->aperture[i]);
+       }
 
        kfree(dom);
 }
@@ -797,19 +986,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
  * It also intializes the page table and the address allocator data
  * structures required for the dma_ops interface
  */
-static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
-                                                  unsigned order)
+static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
 {
        struct dma_ops_domain *dma_dom;
-       unsigned i, num_pte_pages;
-       u64 *l2_pde;
-       u64 address;
-
-       /*
-        * Currently the DMA aperture must be between 32 MB and 1GB in size
-        */
-       if ((order < 25) || (order > 30))
-               return NULL;
 
        dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
        if (!dma_dom)
@@ -826,55 +1005,20 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
        dma_dom->domain.priv = dma_dom;
        if (!dma_dom->domain.pt_root)
                goto free_dma_dom;
-       dma_dom->aperture_size = (1ULL << order);
-       dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
-                                 GFP_KERNEL);
-       if (!dma_dom->bitmap)
-               goto free_dma_dom;
-       /*
-        * mark the first page as allocated so we never return 0 as
-        * a valid dma-address. So we can use 0 as error value
-        */
-       dma_dom->bitmap[0] = 1;
-       dma_dom->next_bit = 0;
 
        dma_dom->need_flush = false;
        dma_dom->target_dev = 0xffff;
 
-       /* Intialize the exclusion range if necessary */
-       if (iommu->exclusion_start &&
-           iommu->exclusion_start < dma_dom->aperture_size) {
-               unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
-               int pages = iommu_num_pages(iommu->exclusion_start,
-                                           iommu->exclusion_length,
-                                           PAGE_SIZE);
-               dma_ops_reserve_addresses(dma_dom, startpage, pages);
-       }
+       if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
+               goto free_dma_dom;
 
        /*
-        * At the last step, build the page tables so we don't need to
-        * allocate page table pages in the dma_ops mapping/unmapping
-        * path.
+        * mark the first page as allocated so we never return 0 as
+        * a valid dma-address. So we can use 0 as error value
         */
-       num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
-       dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
-                       GFP_KERNEL);
-       if (!dma_dom->pte_pages)
-               goto free_dma_dom;
-
-       l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
-       if (l2_pde == NULL)
-               goto free_dma_dom;
+       dma_dom->aperture[0]->bitmap[0] = 1;
+       dma_dom->next_address = 0;
 
-       dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
-
-       for (i = 0; i < num_pte_pages; ++i) {
-               dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
-               if (!dma_dom->pte_pages[i])
-                       goto free_dma_dom;
-               address = virt_to_phys(dma_dom->pte_pages[i]);
-               l2_pde[i] = IOMMU_L1_PDE(address);
-       }
 
        return dma_dom;
 
@@ -983,7 +1127,6 @@ static int device_change_notifier(struct notifier_block *nb,
        struct protection_domain *domain;
        struct dma_ops_domain *dma_domain;
        struct amd_iommu *iommu;
-       int order = amd_iommu_aperture_order;
        unsigned long flags;
 
        if (devid > amd_iommu_last_bdf)
@@ -1009,10 +1152,11 @@ static int device_change_notifier(struct notifier_block *nb,
                if (!dma_domain)
                        dma_domain = iommu->default_dom;
                attach_device(iommu, &dma_domain->domain, devid);
-               printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-                      "device %s\n", dma_domain->domain.id, dev_name(dev));
+               DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain "
+                           "%d for device %s\n",
+                           dma_domain->domain.id, dev_name(dev));
                break;
-       case BUS_NOTIFY_UNBIND_DRIVER:
+       case BUS_NOTIFY_UNBOUND_DRIVER:
                if (!domain)
                        goto out;
                detach_device(domain, devid);
@@ -1022,7 +1166,7 @@ static int device_change_notifier(struct notifier_block *nb,
                dma_domain = find_protection_domain(devid);
                if (dma_domain)
                        goto out;
-               dma_domain = dma_ops_domain_alloc(iommu, order);
+               dma_domain = dma_ops_domain_alloc(iommu);
                if (!dma_domain)
                        goto out;
                dma_domain->target_dev = devid;
@@ -1133,8 +1277,9 @@ static int get_device_resources(struct device *dev,
                        dma_dom = (*iommu)->default_dom;
                *domain = &dma_dom->domain;
                attach_device(*iommu, *domain, *bdf);
-               printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-                               "device %s\n", (*domain)->id, dev_name(dev));
+               DUMP_printk(KERN_INFO "AMD IOMMU: Using protection domain "
+                               "%d for device %s\n",
+                               (*domain)->id, dev_name(dev));
        }
 
        if (domain_for_device(_bdf) == NULL)
@@ -1144,6 +1289,66 @@ static int get_device_resources(struct device *dev,
 }
 
 /*
+ * If the pte_page is not yet allocated this function is called
+ */
+static u64* alloc_pte(struct protection_domain *dom,
+                     unsigned long address, u64 **pte_page, gfp_t gfp)
+{
+       u64 *pte, *page;
+
+       pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+       if (!IOMMU_PTE_PRESENT(*pte)) {
+               page = (u64 *)get_zeroed_page(gfp);
+               if (!page)
+                       return NULL;
+               *pte = IOMMU_L2_PDE(virt_to_phys(page));
+       }
+
+       pte = IOMMU_PTE_PAGE(*pte);
+       pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+       if (!IOMMU_PTE_PRESENT(*pte)) {
+               page = (u64 *)get_zeroed_page(gfp);
+               if (!page)
+                       return NULL;
+               *pte = IOMMU_L1_PDE(virt_to_phys(page));
+       }
+
+       pte = IOMMU_PTE_PAGE(*pte);
+
+       if (pte_page)
+               *pte_page = pte;
+
+       pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+       return pte;
+}
+
+/*
+ * This function fetches the PTE for a given address in the aperture
+ */
+static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
+                           unsigned long address)
+{
+       struct aperture_range *aperture;
+       u64 *pte, *pte_page;
+
+       aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
+       if (!aperture)
+               return NULL;
+
+       pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
+       if (!pte) {
+               pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
+               aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
+       } else
+               pte += IOMMU_PTE_L0_INDEX(address);
+
+       return pte;
+}
+
+/*
  * This is the generic map function. It maps one 4kb page at paddr to
  * the given address in the DMA address space for the domain.
  */
@@ -1159,8 +1364,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
 
        paddr &= PAGE_MASK;
 
-       pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
-       pte += IOMMU_PTE_L0_INDEX(address);
+       pte  = dma_ops_get_pte(dom, address);
+       if (!pte)
+               return bad_dma_address;
 
        __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
 
@@ -1185,14 +1391,20 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
                                 struct dma_ops_domain *dom,
                                 unsigned long address)
 {
+       struct aperture_range *aperture;
        u64 *pte;
 
        if (address >= dom->aperture_size)
                return;
 
-       WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
+       aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
+       if (!aperture)
+               return;
+
+       pte  = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
+       if (!pte)
+               return;
 
-       pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
        pte += IOMMU_PTE_L0_INDEX(address);
 
        WARN_ON(!*pte);
@@ -1216,7 +1428,7 @@ static dma_addr_t __map_single(struct device *dev,
                               u64 dma_mask)
 {
        dma_addr_t offset = paddr & ~PAGE_MASK;
-       dma_addr_t address, start;
+       dma_addr_t address, start, ret;
        unsigned int pages;
        unsigned long align_mask = 0;
        int i;
@@ -1232,14 +1444,33 @@ static dma_addr_t __map_single(struct device *dev,
        if (align)
                align_mask = (1UL << get_order(size)) - 1;
 
+retry:
        address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
                                          dma_mask);
-       if (unlikely(address == bad_dma_address))
-               goto out;
+       if (unlikely(address == bad_dma_address)) {
+               /*
+                * setting next_address here will let the address
+                * allocator only scan the new allocated range in the
+                * first run. This is a small optimization.
+                */
+               dma_dom->next_address = dma_dom->aperture_size;
+
+               if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
+                       goto out;
+
+               /*
+                * aperture was sucessfully enlarged by 128 MB, try
+                * allocation again
+                */
+               goto retry;
+       }
 
        start = address;
        for (i = 0; i < pages; ++i) {
-               dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+               ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+               if (ret == bad_dma_address)
+                       goto out_unmap;
+
                paddr += PAGE_SIZE;
                start += PAGE_SIZE;
        }
@@ -1255,6 +1486,17 @@ static dma_addr_t __map_single(struct device *dev,
 
 out:
        return address;
+
+out_unmap:
+
+       for (--i; i >= 0; --i) {
+               start -= PAGE_SIZE;
+               dma_ops_domain_unmap(iommu, dma_dom, start);
+       }
+
+       dma_ops_free_addresses(dma_dom, address, pages);
+
+       return bad_dma_address;
 }
 
 /*
@@ -1625,7 +1867,6 @@ static void prealloc_protection_domains(void)
        struct pci_dev *dev = NULL;
        struct dma_ops_domain *dma_dom;
        struct amd_iommu *iommu;
-       int order = amd_iommu_aperture_order;
        u16 devid;
 
        while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
@@ -1638,7 +1879,7 @@ static void prealloc_protection_domains(void)
                iommu = amd_iommu_rlookup_table[devid];
                if (!iommu)
                        continue;
-               dma_dom = dma_ops_domain_alloc(iommu, order);
+               dma_dom = dma_ops_domain_alloc(iommu);
                if (!dma_dom)
                        continue;
                init_unity_mappings_for_device(dma_dom, devid);
@@ -1664,7 +1905,6 @@ static struct dma_map_ops amd_iommu_dma_ops = {
 int __init amd_iommu_init_dma_ops(void)
 {
        struct amd_iommu *iommu;
-       int order = amd_iommu_aperture_order;
        int ret;
 
        /*
@@ -1672,8 +1912,8 @@ int __init amd_iommu_init_dma_ops(void)
         * found in the system. Devices not assigned to any other
         * protection domain will be assigned to the default one.
         */
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
-               iommu->default_dom = dma_ops_domain_alloc(iommu, order);
+       for_each_iommu(iommu) {
+               iommu->default_dom = dma_ops_domain_alloc(iommu);
                if (iommu->default_dom == NULL)
                        return -ENOMEM;
                iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -1710,7 +1950,7 @@ int __init amd_iommu_init_dma_ops(void)
 
 free_domains:
 
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
+       for_each_iommu(iommu) {
                if (iommu->default_dom)
                        dma_ops_domain_free(iommu->default_dom);
        }
index 8c0be09..238989e 100644 (file)
@@ -115,15 +115,21 @@ struct ivmd_header {
        u64 range_length;
 } __attribute__((packed));
 
+bool amd_iommu_dump;
+
 static int __initdata amd_iommu_detected;
 
 u16 amd_iommu_last_bdf;                        /* largest PCI device id we have
                                           to handle */
 LIST_HEAD(amd_iommu_unity_map);                /* a list of required unity mappings
                                           we find in ACPI */
-unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
+#ifdef CONFIG_IOMMU_STRESS
+bool amd_iommu_isolate = false;
+#else
 bool amd_iommu_isolate = true;         /* if true, device isolation is
                                           enabled */
+#endif
+
 bool amd_iommu_unmap_flush;            /* if true, flush on every unmap */
 
 LIST_HEAD(amd_iommu_list);             /* list of all AMD IOMMUs in the
@@ -175,7 +181,7 @@ static inline void update_last_devid(u16 devid)
 static inline unsigned long tbl_size(int entry_size)
 {
        unsigned shift = PAGE_SHIFT +
-                        get_order(amd_iommu_last_bdf * entry_size);
+                        get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
 
        return 1UL << shift;
 }
@@ -193,7 +199,7 @@ static inline unsigned long tbl_size(int entry_size)
  * This function set the exclusion range in the IOMMU. DMA accesses to the
  * exclusion range are passed through untranslated
  */
-static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
+static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 {
        u64 start = iommu->exclusion_start & PAGE_MASK;
        u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
@@ -225,7 +231,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu)
 }
 
 /* Generic functions to enable/disable certain features of the IOMMU. */
-static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
+static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
 {
        u32 ctrl;
 
@@ -244,7 +250,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 }
 
 /* Function to enable the hardware */
-static void __init iommu_enable(struct amd_iommu *iommu)
+static void iommu_enable(struct amd_iommu *iommu)
 {
        printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
               dev_name(&iommu->dev->dev), iommu->cap_ptr);
@@ -252,11 +258,9 @@ static void __init iommu_enable(struct amd_iommu *iommu)
        iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
 
-/* Function to enable IOMMU event logging and event interrupts */
-static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
+static void iommu_disable(struct amd_iommu *iommu)
 {
-       iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
-       iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+       iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
 }
 
 /*
@@ -413,25 +417,36 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
 {
        u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
                        get_order(CMD_BUFFER_SIZE));
-       u64 entry;
 
        if (cmd_buf == NULL)
                return NULL;
 
        iommu->cmd_buf_size = CMD_BUFFER_SIZE;
 
-       entry = (u64)virt_to_phys(cmd_buf);
+       return cmd_buf;
+}
+
+/*
+ * This function writes the command buffer address to the hardware and
+ * enables it.
+ */
+static void iommu_enable_command_buffer(struct amd_iommu *iommu)
+{
+       u64 entry;
+
+       BUG_ON(iommu->cmd_buf == NULL);
+
+       entry = (u64)virt_to_phys(iommu->cmd_buf);
        entry |= MMIO_CMD_SIZE_512;
+
        memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
-                       &entry, sizeof(entry));
+                   &entry, sizeof(entry));
 
        /* set head and tail to zero manually */
        writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
        writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 
        iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
-
-       return cmd_buf;
 }
 
 static void __init free_command_buffer(struct amd_iommu *iommu)
@@ -443,20 +458,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
 /* allocates the memory where the IOMMU will log its events to */
 static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
 {
-       u64 entry;
        iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
                                                get_order(EVT_BUFFER_SIZE));
 
        if (iommu->evt_buf == NULL)
                return NULL;
 
+       return iommu->evt_buf;
+}
+
+static void iommu_enable_event_buffer(struct amd_iommu *iommu)
+{
+       u64 entry;
+
+       BUG_ON(iommu->evt_buf == NULL);
+
        entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
+
        memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
                    &entry, sizeof(entry));
 
-       iommu->evt_buf_size = EVT_BUFFER_SIZE;
-
-       return iommu->evt_buf;
+       iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 }
 
 static void __init free_event_buffer(struct amd_iommu *iommu)
@@ -596,32 +618,83 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
        p += sizeof(struct ivhd_header);
        end += h->length;
 
+
        while (p < end) {
                e = (struct ivhd_entry *)p;
                switch (e->type) {
                case IVHD_DEV_ALL:
+
+                       DUMP_printk("  DEV_ALL\t\t\t first devid: %02x:%02x.%x"
+                                   " last device %02x:%02x.%x flags: %02x\n",
+                                   PCI_BUS(iommu->first_device),
+                                   PCI_SLOT(iommu->first_device),
+                                   PCI_FUNC(iommu->first_device),
+                                   PCI_BUS(iommu->last_device),
+                                   PCI_SLOT(iommu->last_device),
+                                   PCI_FUNC(iommu->last_device),
+                                   e->flags);
+
                        for (dev_i = iommu->first_device;
                                        dev_i <= iommu->last_device; ++dev_i)
                                set_dev_entry_from_acpi(iommu, dev_i,
                                                        e->flags, 0);
                        break;
                case IVHD_DEV_SELECT:
+
+                       DUMP_printk("  DEV_SELECT\t\t\t devid: %02x:%02x.%x "
+                                   "flags: %02x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid),
+                                   e->flags);
+
                        devid = e->devid;
                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
                        break;
                case IVHD_DEV_SELECT_RANGE_START:
+
+                       DUMP_printk("  DEV_SELECT_RANGE_START\t "
+                                   "devid: %02x:%02x.%x flags: %02x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid),
+                                   e->flags);
+
                        devid_start = e->devid;
                        flags = e->flags;
                        ext_flags = 0;
                        alias = false;
                        break;
                case IVHD_DEV_ALIAS:
+
+                       DUMP_printk("  DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
+                                   "flags: %02x devid_to: %02x:%02x.%x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid),
+                                   e->flags,
+                                   PCI_BUS(e->ext >> 8),
+                                   PCI_SLOT(e->ext >> 8),
+                                   PCI_FUNC(e->ext >> 8));
+
                        devid = e->devid;
                        devid_to = e->ext >> 8;
-                       set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
+                       set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
                        amd_iommu_alias_table[devid] = devid_to;
                        break;
                case IVHD_DEV_ALIAS_RANGE:
+
+                       DUMP_printk("  DEV_ALIAS_RANGE\t\t "
+                                   "devid: %02x:%02x.%x flags: %02x "
+                                   "devid_to: %02x:%02x.%x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid),
+                                   e->flags,
+                                   PCI_BUS(e->ext >> 8),
+                                   PCI_SLOT(e->ext >> 8),
+                                   PCI_FUNC(e->ext >> 8));
+
                        devid_start = e->devid;
                        flags = e->flags;
                        devid_to = e->ext >> 8;
@@ -629,17 +702,39 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
                        alias = true;
                        break;
                case IVHD_DEV_EXT_SELECT:
+
+                       DUMP_printk("  DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
+                                   "flags: %02x ext: %08x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid),
+                                   e->flags, e->ext);
+
                        devid = e->devid;
                        set_dev_entry_from_acpi(iommu, devid, e->flags,
                                                e->ext);
                        break;
                case IVHD_DEV_EXT_SELECT_RANGE:
+
+                       DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
+                                   "%02x:%02x.%x flags: %02x ext: %08x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid),
+                                   e->flags, e->ext);
+
                        devid_start = e->devid;
                        flags = e->flags;
                        ext_flags = e->ext;
                        alias = false;
                        break;
                case IVHD_DEV_RANGE_END:
+
+                       DUMP_printk("  DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid));
+
                        devid = e->devid;
                        for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
                                if (alias)
@@ -679,7 +774,7 @@ static void __init free_iommu_all(void)
 {
        struct amd_iommu *iommu, *next;
 
-       list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
+       for_each_iommu_safe(iommu, next) {
                list_del(&iommu->list);
                free_iommu_one(iommu);
                kfree(iommu);
@@ -710,7 +805,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
        if (!iommu->mmio_base)
                return -ENOMEM;
 
-       iommu_set_device_table(iommu);
        iommu->cmd_buf = alloc_command_buffer(iommu);
        if (!iommu->cmd_buf)
                return -ENOMEM;
@@ -746,6 +840,15 @@ static int __init init_iommu_all(struct acpi_table_header *table)
                h = (struct ivhd_header *)p;
                switch (*p) {
                case ACPI_IVHD_TYPE:
+
+                       DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
+                                   "seg: %d flags: %01x info %04x\n",
+                                   PCI_BUS(h->devid), PCI_SLOT(h->devid),
+                                   PCI_FUNC(h->devid), h->cap_ptr,
+                                   h->pci_seg, h->flags, h->info);
+                       DUMP_printk("       mmio-addr: %016llx\n",
+                                   h->mmio_phys);
+
                        iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
                        if (iommu == NULL)
                                return -ENOMEM;
@@ -773,56 +876,9 @@ static int __init init_iommu_all(struct acpi_table_header *table)
  *
  ****************************************************************************/
 
-static int __init iommu_setup_msix(struct amd_iommu *iommu)
-{
-       struct amd_iommu *curr;
-       struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
-       int nvec = 0, i;
-
-       list_for_each_entry(curr, &amd_iommu_list, list) {
-               if (curr->dev == iommu->dev) {
-                       entries[nvec].entry = curr->evt_msi_num;
-                       entries[nvec].vector = 0;
-                       curr->int_enabled = true;
-                       nvec++;
-               }
-       }
-
-       if (pci_enable_msix(iommu->dev, entries, nvec)) {
-               pci_disable_msix(iommu->dev);
-               return 1;
-       }
-
-       for (i = 0; i < nvec; ++i) {
-               int r = request_irq(entries->vector, amd_iommu_int_handler,
-                                   IRQF_SAMPLE_RANDOM,
-                                   "AMD IOMMU",
-                                   NULL);
-               if (r)
-                       goto out_free;
-       }
-
-       return 0;
-
-out_free:
-       for (i -= 1; i >= 0; --i)
-               free_irq(entries->vector, NULL);
-
-       pci_disable_msix(iommu->dev);
-
-       return 1;
-}
-
 static int __init iommu_setup_msi(struct amd_iommu *iommu)
 {
        int r;
-       struct amd_iommu *curr;
-
-       list_for_each_entry(curr, &amd_iommu_list, list) {
-               if (curr->dev == iommu->dev)
-                       curr->int_enabled = true;
-       }
-
 
        if (pci_enable_msi(iommu->dev))
                return 1;
@@ -837,17 +893,18 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
                return 1;
        }
 
+       iommu->int_enabled = true;
+       iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+
        return 0;
 }
 
-static int __init iommu_init_msi(struct amd_iommu *iommu)
+static int iommu_init_msi(struct amd_iommu *iommu)
 {
        if (iommu->int_enabled)
                return 0;
 
-       if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX))
-               return iommu_setup_msix(iommu);
-       else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
+       if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
                return iommu_setup_msi(iommu);
 
        return 1;
@@ -899,6 +956,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
 static int __init init_unity_map_range(struct ivmd_header *m)
 {
        struct unity_map_entry *e = 0;
+       char *s;
 
        e = kzalloc(sizeof(*e), GFP_KERNEL);
        if (e == NULL)
@@ -906,14 +964,19 @@ static int __init init_unity_map_range(struct ivmd_header *m)
 
        switch (m->type) {
        default:
+               kfree(e);
+               return 0;
        case ACPI_IVMD_TYPE:
+               s = "IVMD_TYPEi\t\t\t";
                e->devid_start = e->devid_end = m->devid;
                break;
        case ACPI_IVMD_TYPE_ALL:
+               s = "IVMD_TYPE_ALL\t\t";
                e->devid_start = 0;
                e->devid_end = amd_iommu_last_bdf;
                break;
        case ACPI_IVMD_TYPE_RANGE:
+               s = "IVMD_TYPE_RANGE\t\t";
                e->devid_start = m->devid;
                e->devid_end = m->aux;
                break;
@@ -922,6 +985,13 @@ static int __init init_unity_map_range(struct ivmd_header *m)
        e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
        e->prot = m->flags >> 1;
 
+       DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
+                   " range_start: %016llx range_end: %016llx flags: %x\n", s,
+                   PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
+                   PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
+                   PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
+                   e->address_start, e->address_end, m->flags);
+
        list_add_tail(&e->list, &amd_iommu_unity_map);
 
        return 0;
@@ -967,18 +1037,28 @@ static void init_device_table(void)
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
  */
-static void __init enable_iommus(void)
+static void enable_iommus(void)
 {
        struct amd_iommu *iommu;
 
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
+       for_each_iommu(iommu) {
+               iommu_set_device_table(iommu);
+               iommu_enable_command_buffer(iommu);
+               iommu_enable_event_buffer(iommu);
                iommu_set_exclusion_range(iommu);
                iommu_init_msi(iommu);
-               iommu_enable_event_logging(iommu);
                iommu_enable(iommu);
        }
 }
 
+static void disable_iommus(void)
+{
+       struct amd_iommu *iommu;
+
+       for_each_iommu(iommu)
+               iommu_disable(iommu);
+}
+
 /*
  * Suspend/Resume support
  * disable suspend until real resume implemented
@@ -986,12 +1066,31 @@ static void __init enable_iommus(void)
 
 static int amd_iommu_resume(struct sys_device *dev)
 {
+       /*
+        * Disable IOMMUs before reprogramming the hardware registers.
+        * IOMMU is still enabled from the resume kernel.
+        */
+       disable_iommus();
+
+       /* re-load the hardware */
+       enable_iommus();
+
+       /*
+        * we have to flush after the IOMMUs are enabled because a
+        * disabled IOMMU will never execute the commands we send
+        */
+       amd_iommu_flush_all_domains();
+       amd_iommu_flush_all_devices();
+
        return 0;
 }
 
 static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
 {
-       return -EINVAL;
+       /* disable IOMMUs to go out of the way for BIOS */
+       disable_iommus();
+
+       return 0;
 }
 
 static struct sysdev_class amd_iommu_sysdev_class = {
@@ -1137,9 +1236,6 @@ int __init amd_iommu_init(void)
 
        enable_iommus();
 
-       printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
-                       (1 << (amd_iommu_aperture_order-20)));
-
        printk(KERN_INFO "AMD IOMMU: device isolation ");
        if (amd_iommu_isolate)
                printk("enabled\n");
@@ -1211,6 +1307,13 @@ void __init amd_iommu_detect(void)
  *
  ****************************************************************************/
 
+static int __init parse_amd_iommu_dump(char *str)
+{
+       amd_iommu_dump = true;
+
+       return 1;
+}
+
 static int __init parse_amd_iommu_options(char *str)
 {
        for (; *str; ++str) {
@@ -1225,15 +1328,5 @@ static int __init parse_amd_iommu_options(char *str)
        return 1;
 }
 
-static int __init parse_amd_iommu_size_options(char *str)
-{
-       unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
-
-       if ((order > 24) && (order < 31))
-               amd_iommu_aperture_order = order;
-
-       return 1;
-}
-
+__setup("amd_iommu_dump", parse_amd_iommu_dump);
 __setup("amd_iommu=", parse_amd_iommu_options);
-__setup("amd_iommu_size=", parse_amd_iommu_size_options);
index 755c21e..971a3be 100644 (file)
@@ -186,37 +186,6 @@ static struct cal_chipset_ops calioc2_chip_ops = {
 
 static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
 
-/* enable this to stress test the chip's TCE cache */
-#ifdef CONFIG_IOMMU_DEBUG
-static int debugging = 1;
-
-static inline unsigned long verify_bit_range(unsigned long* bitmap,
-       int expected, unsigned long start, unsigned long end)
-{
-       unsigned long idx = start;
-
-       BUG_ON(start >= end);
-
-       while (idx < end) {
-               if (!!test_bit(idx, bitmap) != expected)
-                       return idx;
-               ++idx;
-       }
-
-       /* all bits have the expected value */
-       return ~0UL;
-}
-#else /* debugging is disabled */
-static int debugging;
-
-static inline unsigned long verify_bit_range(unsigned long* bitmap,
-       int expected, unsigned long start, unsigned long end)
-{
-       return ~0UL;
-}
-
-#endif /* CONFIG_IOMMU_DEBUG */
-
 static inline int translation_enabled(struct iommu_table *tbl)
 {
        /* only PHBs with translation enabled have an IOMMU table */
@@ -228,7 +197,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
 {
        unsigned long index;
        unsigned long end;
-       unsigned long badbit;
        unsigned long flags;
 
        index = start_addr >> PAGE_SHIFT;
@@ -243,14 +211,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
 
        spin_lock_irqsave(&tbl->it_lock, flags);
 
-       badbit = verify_bit_range(tbl->it_map, 0, index, end);
-       if (badbit != ~0UL) {
-               if (printk_ratelimit())
-                       printk(KERN_ERR "Calgary: entry already allocated at "
-                              "0x%lx tbl %p dma 0x%lx npages %u\n",
-                              badbit, tbl, start_addr, npages);
-       }
-
        iommu_area_reserve(tbl->it_map, index, npages);
 
        spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -326,7 +286,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
        unsigned int npages)
 {
        unsigned long entry;
-       unsigned long badbit;
        unsigned long badend;
        unsigned long flags;
 
@@ -346,14 +305,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
 
        spin_lock_irqsave(&tbl->it_lock, flags);
 
-       badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
-       if (badbit != ~0UL) {
-               if (printk_ratelimit())
-                       printk(KERN_ERR "Calgary: bit is off at 0x%lx "
-                              "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
-                              badbit, tbl, dma_addr, entry, npages);
-       }
-
        iommu_area_free(tbl->it_map, entry, npages);
 
        spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -1488,9 +1439,8 @@ void __init detect_calgary(void)
                iommu_detected = 1;
                calgary_detected = 1;
                printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
-               printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
-                      "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
-                      debugging ? "enabled" : "disabled");
+               printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
+                      specified_table_size);
 
                /* swiotlb for devices that aren't behind the Calgary. */
                if (max_pfn > MAX_DMA32_PFN)
index b284b58..cfd9f90 100644 (file)
@@ -144,48 +144,21 @@ static void flush_gart(void)
 }
 
 #ifdef CONFIG_IOMMU_LEAK
-
-#define SET_LEAK(x)                                                    \
-       do {                                                            \
-               if (iommu_leak_tab)                                     \
-                       iommu_leak_tab[x] = __builtin_return_address(0);\
-       } while (0)
-
-#define CLEAR_LEAK(x)                                                  \
-       do {                                                            \
-               if (iommu_leak_tab)                                     \
-                       iommu_leak_tab[x] = NULL;                       \
-       } while (0)
-
 /* Debugging aid for drivers that don't free their IOMMU tables */
-static void **iommu_leak_tab;
 static int leak_trace;
 static int iommu_leak_pages = 20;
 
 static void dump_leak(void)
 {
-       int i;
        static int dump;
 
-       if (dump || !iommu_leak_tab)
+       if (dump)
                return;
        dump = 1;
-       show_stack(NULL, NULL);
 
-       /* Very crude. dump some from the end of the table too */
-       printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
-              iommu_leak_pages);
-       for (i = 0; i < iommu_leak_pages; i += 2) {
-               printk(KERN_DEBUG "%lu: ", iommu_pages-i);
-               printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
-                               0);
-               printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
-       }
-       printk(KERN_DEBUG "\n");
+       show_stack(NULL, NULL);
+       debug_dma_dump_mappings(NULL);
 }
-#else
-# define SET_LEAK(x)
-# define CLEAR_LEAK(x)
 #endif
 
 static void iommu_full(struct device *dev, size_t size, int dir)
@@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
 
        for (i = 0; i < npages; i++) {
                iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
-               SET_LEAK(iommu_page + i);
                phys_mem += PAGE_SIZE;
        }
        return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
@@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
        npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
        for (i = 0; i < npages; i++) {
                iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
-               CLEAR_LEAK(iommu_page + i);
        }
        free_iommu(iommu_page, npages);
 }
@@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
                pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
                while (pages--) {
                        iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
-                       SET_LEAK(iommu_page);
                        addr += PAGE_SIZE;
                        iommu_page++;
                }
@@ -688,8 +658,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
        agp_gatt_table = gatt;
 
-       enable_gart_translations();
-
        error = sysdev_class_register(&gart_sysdev_class);
        if (!error)
                error = sysdev_register(&device_gart);
@@ -801,11 +769,12 @@ void __init gart_iommu_init(void)
 
 #ifdef CONFIG_IOMMU_LEAK
        if (leak_trace) {
-               iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
-                                 get_order(iommu_pages*sizeof(void *)));
-               if (!iommu_leak_tab)
+               int ret;
+
+               ret = dma_debug_resize_entries(iommu_pages);
+               if (ret)
                        printk(KERN_DEBUG
-                              "PCI-DMA: Cannot allocate leak trace area\n");
+                              "PCI-DMA: Cannot trace all the entries\n");
        }
 #endif
 
@@ -845,6 +814,14 @@ void __init gart_iommu_init(void)
         * the pages as Not-Present:
         */
        wbinvd();
+       
+       /*
+        * Now all caches are flushed and we can safely enable
+        * GART hardware.  Doing it early leaves the possibility
+        * of stale cache entries that can lead to GART PTE
+        * errors.
+        */
+       enable_gart_translations();
 
        /*
         * Try to workaround a bug (thanks to BenH):
index 221a385..a1712f2 100644 (file)
@@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
        return paddr;
 }
 
-phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 {
        return baddr;
 }
index 28d53cb..171ad8a 100644 (file)
@@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus);
 
 extern void dma_debug_init(u32 num_entries);
 
+extern int dma_debug_resize_entries(u32 num_entries);
+
 extern void debug_dma_map_page(struct device *dev, struct page *page,
                               size_t offset, size_t size,
                               int direction, dma_addr_t dma_addr,
@@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries)
 {
 }
 
+static inline int dma_debug_resize_entries(u32 num_entries)
+{
+       return 0;
+}
+
 static inline void debug_dma_map_page(struct device *dev, struct page *page,
                                      size_t offset, size_t size,
                                      int direction, dma_addr_t dma_addr,
index ac9ff54..cb1a663 100644 (file)
@@ -29,7 +29,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
 
 extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
                                      phys_addr_t address);
-extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
+extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev,
+                                      dma_addr_t address);
 
 extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size);
 
index 69da09a..8fcc09c 100644 (file)
@@ -85,6 +85,7 @@ static u32 show_num_errors = 1;
 
 static u32 num_free_entries;
 static u32 min_free_entries;
+static u32 nr_total_entries;
 
 /* number of preallocated entries requested by kernel cmdline */
 static u32 req_entries;
@@ -185,15 +186,50 @@ static void put_hash_bucket(struct hash_bucket *bucket,
 static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
                                                struct dma_debug_entry *ref)
 {
-       struct dma_debug_entry *entry;
+       struct dma_debug_entry *entry, *ret = NULL;
+       int matches = 0, match_lvl, last_lvl = 0;
 
        list_for_each_entry(entry, &bucket->list, list) {
-               if ((entry->dev_addr == ref->dev_addr) &&
-                   (entry->dev == ref->dev))
+               if ((entry->dev_addr != ref->dev_addr) ||
+                   (entry->dev != ref->dev))
+                       continue;
+
+               /*
+                * Some drivers map the same physical address multiple
+                * times. Without a hardware IOMMU this results in the
+                * same device addresses being put into the dma-debug
+                * hash multiple times too. This can result in false
+                * positives being reported. Therfore we implement a
+                * best-fit algorithm here which returns the entry from
+                * the hash which fits best to the reference value
+                * instead of the first-fit.
+                */
+               matches += 1;
+               match_lvl = 0;
+               entry->size      == ref->size      ? ++match_lvl : match_lvl;
+               entry->type      == ref->type      ? ++match_lvl : match_lvl;
+               entry->direction == ref->direction ? ++match_lvl : match_lvl;
+
+               if (match_lvl == 3) {
+                       /* perfect-fit - return the result */
                        return entry;
+               } else if (match_lvl > last_lvl) {
+                       /*
+                        * We found an entry that fits better then the
+                        * previous one
+                        */
+                       last_lvl = match_lvl;
+                       ret      = entry;
+               }
        }
 
-       return NULL;
+       /*
+        * If we have multiple matches but no perfect-fit, just return
+        * NULL.
+        */
+       ret = (matches == 1) ? ret : NULL;
+
+       return ret;
 }
 
 /*
@@ -257,6 +293,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
        put_hash_bucket(bucket, &flags);
 }
 
+static struct dma_debug_entry *__dma_entry_alloc(void)
+{
+       struct dma_debug_entry *entry;
+
+       entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+       list_del(&entry->list);
+       memset(entry, 0, sizeof(*entry));
+
+       num_free_entries -= 1;
+       if (num_free_entries < min_free_entries)
+               min_free_entries = num_free_entries;
+
+       return entry;
+}
+
 /* struct dma_entry allocator
  *
  * The next two functions implement the allocator for
@@ -276,9 +327,7 @@ static struct dma_debug_entry *dma_entry_alloc(void)
                goto out;
        }
 
-       entry = list_entry(free_entries.next, struct dma_debug_entry, list);
-       list_del(&entry->list);
-       memset(entry, 0, sizeof(*entry));
+       entry = __dma_entry_alloc();
 
 #ifdef CONFIG_STACKTRACE
        entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
@@ -286,9 +335,6 @@ static struct dma_debug_entry *dma_entry_alloc(void)
        entry->stacktrace.skip = 2;
        save_stack_trace(&entry->stacktrace);
 #endif
-       num_free_entries -= 1;
-       if (num_free_entries < min_free_entries)
-               min_free_entries = num_free_entries;
 
 out:
        spin_unlock_irqrestore(&free_entries_lock, flags);
@@ -310,6 +356,53 @@ static void dma_entry_free(struct dma_debug_entry *entry)
        spin_unlock_irqrestore(&free_entries_lock, flags);
 }
 
+int dma_debug_resize_entries(u32 num_entries)
+{
+       int i, delta, ret = 0;
+       unsigned long flags;
+       struct dma_debug_entry *entry;
+       LIST_HEAD(tmp);
+
+       spin_lock_irqsave(&free_entries_lock, flags);
+
+       if (nr_total_entries < num_entries) {
+               delta = num_entries - nr_total_entries;
+
+               spin_unlock_irqrestore(&free_entries_lock, flags);
+
+               for (i = 0; i < delta; i++) {
+                       entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+                       if (!entry)
+                               break;
+
+                       list_add_tail(&entry->list, &tmp);
+               }
+
+               spin_lock_irqsave(&free_entries_lock, flags);
+
+               list_splice(&tmp, &free_entries);
+               nr_total_entries += i;
+               num_free_entries += i;
+       } else {
+               delta = nr_total_entries - num_entries;
+
+               for (i = 0; i < delta && !list_empty(&free_entries); i++) {
+                       entry = __dma_entry_alloc();
+                       kfree(entry);
+               }
+
+               nr_total_entries -= i;
+       }
+
+       if (nr_total_entries != num_entries)
+               ret = 1;
+
+       spin_unlock_irqrestore(&free_entries_lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(dma_debug_resize_entries);
+
 /*
  * DMA-API debugging init code
  *
@@ -439,6 +532,8 @@ void dma_debug_init(u32 num_entries)
                return;
        }
 
+       nr_total_entries = num_free_entries;
+
        printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
 }
 
index 2b0b5a7..bffe6d7 100644 (file)
@@ -60,8 +60,8 @@ enum dma_sync_target {
 int swiotlb_force;
 
 /*
- * Used to do a quick range check in swiotlb_unmap_single and
- * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
+ * Used to do a quick range check in unmap_single and
+ * sync_single_*, to see if the memory was in fact allocated by this
  * API.
  */
 static char *io_tlb_start, *io_tlb_end;
@@ -129,7 +129,7 @@ dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
        return paddr;
 }
 
-phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 {
        return baddr;
 }
@@ -140,9 +140,15 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
        return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
 }
 
-static void *swiotlb_bus_to_virt(dma_addr_t address)
+void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
 {
-       return phys_to_virt(swiotlb_bus_to_phys(address));
+       return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
+}
+
+int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
+                                              dma_addr_t addr, size_t size)
+{
+       return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
 }
 
 int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -309,10 +315,10 @@ cleanup1:
        return -ENOMEM;
 }
 
-static int
+static inline int
 address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
 {
-       return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
+       return swiotlb_arch_address_needs_mapping(hwdev, addr, size);
 }
 
 static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -341,7 +347,7 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
                unsigned long flags;
 
                while (size) {
-                       sz = min(PAGE_SIZE - offset, size);
+                       sz = min_t(size_t, PAGE_SIZE - offset, size);
 
                        local_irq_save(flags);
                        buffer = kmap_atomic(pfn_to_page(pfn),
@@ -476,7 +482,7 @@ found:
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
 static void
-unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 {
        unsigned long flags;
        int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -560,7 +566,6 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
                                   size)) {
                /*
                 * The allocated memory isn't reachable by the device.
-                * Fall back on swiotlb_map_single().
                 */
                free_pages((unsigned long) ret, order);
                ret = NULL;
@@ -568,9 +573,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
        if (!ret) {
                /*
                 * We are either out of memory or the device can't DMA
-                * to GFP_DMA memory; fall back on
-                * swiotlb_map_single(), which will grab memory from
-                * the lowest available address range.
+                * to GFP_DMA memory; fall back on map_single(), which
+                * will grab memory from the lowest available address range.
                 */
                ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
                if (!ret)
@@ -587,7 +591,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
                       (unsigned long long)dev_addr);
 
                /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-               unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
+               do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
                return NULL;
        }
        *dma_handle = dev_addr;
@@ -604,7 +608,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
                free_pages((unsigned long) vaddr, get_order(size));
        else
                /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-               unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+               do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
 }
 EXPORT_SYMBOL(swiotlb_free_coherent);
 
@@ -634,7 +638,7 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
  * physical address to use is returned.
  *
  * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
  */
 dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
                            unsigned long offset, size_t size,
@@ -642,18 +646,17 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
                            struct dma_attrs *attrs)
 {
        phys_addr_t phys = page_to_phys(page) + offset;
-       void *ptr = page_address(page) + offset;
        dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
        void *map;
 
        BUG_ON(dir == DMA_NONE);
        /*
-        * If the pointer passed in happens to be in the device's DMA window,
+        * If the address happens to be in the device's DMA window,
         * we can safely return the device addr and not worry about bounce
         * buffering it.
         */
        if (!address_needs_mapping(dev, dev_addr, size) &&
-           !range_needs_mapping(virt_to_phys(ptr), size))
+           !range_needs_mapping(phys, size))
                return dev_addr;
 
        /*
@@ -679,23 +682,35 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_single call.  All
+ * match what was provided for in a previous swiotlb_map_page call.  All
  * other usages are undefined.
  *
  * After this call, reads by the cpu to the buffer are guaranteed to see
  * whatever the device wrote there.
  */
+static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+                        size_t size, int dir)
+{
+       char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+
+       BUG_ON(dir == DMA_NONE);
+
+       if (is_swiotlb_buffer(dma_addr)) {
+               do_unmap_single(hwdev, dma_addr, size, dir);
+               return;
+       }
+
+       if (dir != DMA_FROM_DEVICE)
+               return;
+
+       dma_mark_clean(dma_addr, size);
+}
+
 void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
                        size_t size, enum dma_data_direction dir,
                        struct dma_attrs *attrs)
 {
-       char *dma_addr = swiotlb_bus_to_virt(dev_addr);
-
-       BUG_ON(dir == DMA_NONE);
-       if (is_swiotlb_buffer(dma_addr))
-               unmap_single(hwdev, dma_addr, size, dir);
-       else if (dir == DMA_FROM_DEVICE)
-               dma_mark_clean(dma_addr, size);
+       unmap_single(hwdev, dev_addr, size, dir);
 }
 EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
 
@@ -703,7 +718,7 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
  * Make physical memory consistent for a single streaming mode DMA translation
  * after a transfer.
  *
- * If you perform a swiotlb_map_single() but wish to interrogate the buffer
+ * If you perform a swiotlb_map_page() but wish to interrogate the buffer
  * using the cpu, yet do not wish to teardown the dma mapping, you must
  * call this function before doing so.  At the next point you give the dma
  * address back to the card, you must first perform a
@@ -713,13 +728,19 @@ static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
                    size_t size, int dir, int target)
 {
-       char *dma_addr = swiotlb_bus_to_virt(dev_addr);
+       char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
 
        BUG_ON(dir == DMA_NONE);
-       if (is_swiotlb_buffer(dma_addr))
+
+       if (is_swiotlb_buffer(dma_addr)) {
                sync_single(hwdev, dma_addr, size, dir, target);
-       else if (dir == DMA_FROM_DEVICE)
-               dma_mark_clean(dma_addr, size);
+               return;
+       }
+
+       if (dir != DMA_FROM_DEVICE)
+               return;
+
+       dma_mark_clean(dma_addr, size);
 }
 
 void
@@ -746,13 +767,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
                          unsigned long offset, size_t size,
                          int dir, int target)
 {
-       char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset;
-
-       BUG_ON(dir == DMA_NONE);
-       if (is_swiotlb_buffer(dma_addr))
-               sync_single(hwdev, dma_addr, size, dir, target);
-       else if (dir == DMA_FROM_DEVICE)
-               dma_mark_clean(dma_addr, size);
+       swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
 }
 
 void
@@ -777,7 +792,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
 
 /*
  * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_single
+ * This is the scatter-gather version of the above swiotlb_map_page
  * interface.  Here the scatter gather list elements are each tagged with the
  * appropriate dma address and length.  They are obtained via
  * sg_dma_{address,length}(SG).
@@ -788,7 +803,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
  *       The routine returns the number of addr/length pairs actually
  *       used, at most nents.
  *
- * Device ownership issues as mentioned above for swiotlb_map_single are the
+ * Device ownership issues as mentioned above for swiotlb_map_page are the
  * same here.
  */
 int
@@ -836,7 +851,7 @@ EXPORT_SYMBOL(swiotlb_map_sg);
 
 /*
  * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_single() above.
+ * concerning calls here are the same as for swiotlb_unmap_page() above.
  */
 void
 swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
@@ -847,13 +862,9 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 
        BUG_ON(dir == DMA_NONE);
 
-       for_each_sg(sgl, sg, nelems, i) {
-               if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
-                       unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
-                                    sg->dma_length, dir);
-               else if (dir == DMA_FROM_DEVICE)
-                       dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
-       }
+       for_each_sg(sgl, sg, nelems, i)
+               unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+
 }
 EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
 
@@ -879,15 +890,9 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
        struct scatterlist *sg;
        int i;
 
-       BUG_ON(dir == DMA_NONE);
-
-       for_each_sg(sgl, sg, nelems, i) {
-               if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
-                       sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
+       for_each_sg(sgl, sg, nelems, i)
+               swiotlb_sync_single(hwdev, sg->dma_address,
                                    sg->dma_length, dir, target);
-               else if (dir == DMA_FROM_DEVICE)
-                       dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
-       }
 }
 
 void