]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - arch/x86/mm/pageattr.c
Merge branch 'x86/urgent' into x86/xen
[linux-2.6.git] / arch / x86 / mm / pageattr.c
index 84ba74820ad6436479a6456ec8d8e3a9a3c5c591..b81b41a0481f8a42924c78b036ab2630722fefeb 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
+#include <asm/setup.h>
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
@@ -33,6 +34,7 @@ struct cpa_data {
        unsigned long   pfn;
        unsigned        force_split : 1;
        int             curpage;
+       struct page     **pages;
 };
 
 /*
@@ -45,6 +47,7 @@ static DEFINE_SPINLOCK(cpa_lock);
 
 #define CPA_FLUSHTLB 1
 #define CPA_ARRAY 2
+#define CPA_PAGES_ARRAY 4
 
 #ifdef CONFIG_PROC_FS
 static unsigned long direct_pages_count[PG_LEVEL_NUM];
@@ -95,7 +98,7 @@ static inline unsigned long highmap_start_pfn(void)
 
 static inline unsigned long highmap_end_pfn(void)
 {
-       return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
+       return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
 }
 
 #endif
@@ -201,10 +204,10 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
        }
 }
 
-static void cpa_flush_array(unsigned long *start, int numpages, int cache)
+static void cpa_flush_array(unsigned long *start, int numpages, int cache,
+                           int in_flags, struct page **pages)
 {
        unsigned int i, level;
-       unsigned long *addr;
 
        BUG_ON(irqs_disabled());
 
@@ -225,14 +228,22 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache)
         * will cause all other CPUs to flush the same
         * cachelines:
         */
-       for (i = 0, addr = start; i < numpages; i++, addr++) {
-               pte_t *pte = lookup_address(*addr, &level);
+       for (i = 0; i < numpages; i++) {
+               unsigned long addr;
+               pte_t *pte;
+
+               if (in_flags & CPA_PAGES_ARRAY)
+                       addr = (unsigned long)page_address(pages[i]);
+               else
+                       addr = start[i];
+
+               pte = lookup_address(addr, &level);
 
                /*
                 * Only flush present addresses:
                 */
                if (pte && (pte_val(*pte) & _PAGE_PRESENT))
-                       clflush_cache_range((void *) *addr, PAGE_SIZE);
+                       clflush_cache_range((void *)addr, PAGE_SIZE);
        }
 }
 
@@ -482,6 +493,13 @@ static int split_large_page(pte_t *kpte, unsigned long address)
        pbase = (pte_t *)page_address(base);
        paravirt_alloc_pte(&init_mm, page_to_pfn(base));
        ref_prot = pte_pgprot(pte_clrhuge(*kpte));
+       /*
+        * If we ever want to utilize the PAT bit, we need to
+        * update this function to make sure it's converted from
+        * bit 12 to bit 7 when we cross from the 2MB level to
+        * the 4K level:
+        */
+       WARN_ON_ONCE(pgprot_val(ref_prot) & _PAGE_PAT_LARGE);
 
 #ifdef CONFIG_X86_64
        if (level == PG_LEVEL_1G) {
@@ -508,18 +526,24 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 #endif
 
        /*
-        * Install the new, split up pagetable. Important details here:
+        * Install the new, split up pagetable.
         *
-        * On Intel the NX bit of all levels must be cleared to make a
-        * page executable. See section 4.13.2 of Intel 64 and IA-32
-        * Architectures Software Developer's Manual).
+        * We use the standard kernel pagetable protections for the new
+        * pagetable protections, the actual ptes set above control the
+        * primary protection behavior:
+        */
+       __set_pmd_pte(kpte, address, mk_pte(base, __pgprot(_KERNPG_TABLE)));
+
+       /*
+        * Intel Atom errata AAH41 workaround.
         *
-        * Mark the entry present. The current mapping might be
-        * set to not present, which we preserved above.
+        * The real fix should be in hw or in a microcode update, but
+        * we also probabilistically try to reduce the window of having
+        * a large TLB mixed with 4K TLBs while instruction fetches are
+        * going on.
         */
-       ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
-       pgprot_val(ref_prot) |= _PAGE_PRESENT;
-       __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
+       __flush_tlb_all();
+
        base = NULL;
 
 out_unlock:
@@ -571,11 +595,12 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
        unsigned int level;
        pte_t *kpte, old_pte;
 
-       if (cpa->flags & CPA_ARRAY)
+       if (cpa->flags & CPA_PAGES_ARRAY)
+               address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
+       else if (cpa->flags & CPA_ARRAY)
                address = cpa->vaddr[cpa->curpage];
        else
                address = *cpa->vaddr;
-
 repeat:
        kpte = lookup_address(address, &level);
        if (!kpte)
@@ -675,7 +700,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
         * No need to redo, when the primary call touched the direct
         * mapping already:
         */
-       if (cpa->flags & CPA_ARRAY)
+       if (cpa->flags & CPA_PAGES_ARRAY)
+               vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
+       else if (cpa->flags & CPA_ARRAY)
                vaddr = cpa->vaddr[cpa->curpage];
        else
                vaddr = *cpa->vaddr;
@@ -686,7 +713,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
                alias_cpa = *cpa;
                temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
                alias_cpa.vaddr = &temp_cpa_vaddr;
-               alias_cpa.flags &= ~CPA_ARRAY;
+               alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
 
 
                ret = __change_page_attr_set_clr(&alias_cpa, 0);
@@ -699,7 +726,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
         * No need to redo, when the primary call touched the high
         * mapping already:
         */
-       if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
+       if (within(vaddr, (unsigned long) _text, _brk_end))
                return 0;
 
        /*
@@ -712,7 +739,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
        alias_cpa = *cpa;
        temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
        alias_cpa.vaddr = &temp_cpa_vaddr;
-       alias_cpa.flags &= ~CPA_ARRAY;
+       alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
 
        /*
         * The high mapping range is imprecise, so ignore the return value.
@@ -733,7 +760,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
                 */
                cpa->numpages = numpages;
                /* for array changes, we can't use large page */
-               if (cpa->flags & CPA_ARRAY)
+               if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
                        cpa->numpages = 1;
 
                if (!debug_pagealloc)
@@ -757,7 +784,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
                 */
                BUG_ON(cpa->numpages > numpages);
                numpages -= cpa->numpages;
-               if (cpa->flags & CPA_ARRAY)
+               if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
                        cpa->curpage++;
                else
                        *cpa->vaddr += cpa->numpages * PAGE_SIZE;
@@ -774,7 +801,8 @@ static inline int cache_attr(pgprot_t attr)
 
 static int change_page_attr_set_clr(unsigned long *addr, int numpages,
                                    pgprot_t mask_set, pgprot_t mask_clr,
-                                   int force_split, int array)
+                                   int force_split, int in_flag,
+                                   struct page **pages)
 {
        struct cpa_data cpa;
        int ret, cache, checkalias;
@@ -789,15 +817,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
                return 0;
 
        /* Ensure we are PAGE_SIZE aligned */
-       if (!array) {
-               if (*addr & ~PAGE_MASK) {
-                       *addr &= PAGE_MASK;
-                       /*
-                        * People should not be passing in unaligned addresses:
-                        */
-                       WARN_ON_ONCE(1);
-               }
-       } else {
+       if (in_flag & CPA_ARRAY) {
                int i;
                for (i = 0; i < numpages; i++) {
                        if (addr[i] & ~PAGE_MASK) {
@@ -805,6 +825,18 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
                                WARN_ON_ONCE(1);
                        }
                }
+       } else if (!(in_flag & CPA_PAGES_ARRAY)) {
+               /*
+                * in_flag of CPA_PAGES_ARRAY implies it is aligned.
+                * No need to cehck in that case
+                */
+               if (*addr & ~PAGE_MASK) {
+                       *addr &= PAGE_MASK;
+                       /*
+                        * People should not be passing in unaligned addresses:
+                        */
+                       WARN_ON_ONCE(1);
+               }
        }
 
        /* Must avoid aliasing mappings in the highmem code */
@@ -813,6 +845,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
        vm_unmap_aliases();
 
        cpa.vaddr = addr;
+       cpa.pages = pages;
        cpa.numpages = numpages;
        cpa.mask_set = mask_set;
        cpa.mask_clr = mask_clr;
@@ -820,8 +853,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
        cpa.curpage = 0;
        cpa.force_split = force_split;
 
-       if (array)
-               cpa.flags |= CPA_ARRAY;
+       if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
+               cpa.flags |= in_flag;
 
        /* No alias checking for _NX bit modifications */
        checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
@@ -847,9 +880,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
         * wbindv):
         */
        if (!ret && cpu_has_clflush) {
-               if (cpa.flags & CPA_ARRAY)
-                       cpa_flush_array(addr, numpages, cache);
-               else
+               if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
+                       cpa_flush_array(addr, numpages, cache,
+                                       cpa.flags, pages);
+               } else
                        cpa_flush_range(*addr, numpages, cache);
        } else
                cpa_flush_all(cache);
@@ -862,14 +896,28 @@ static inline int change_page_attr_set(unsigned long *addr, int numpages,
                                       pgprot_t mask, int array)
 {
        return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
-               array);
+               (array ? CPA_ARRAY : 0), NULL);
 }
 
 static inline int change_page_attr_clear(unsigned long *addr, int numpages,
                                         pgprot_t mask, int array)
 {
        return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
-               array);
+               (array ? CPA_ARRAY : 0), NULL);
+}
+
+static inline int cpa_set_pages_array(struct page **pages, int numpages,
+                                      pgprot_t mask)
+{
+       return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
+               CPA_PAGES_ARRAY, pages);
+}
+
+static inline int cpa_clear_pages_array(struct page **pages, int numpages,
+                                        pgprot_t mask)
+{
+       return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
+               CPA_PAGES_ARRAY, pages);
 }
 
 int _set_memory_uc(unsigned long addr, int numpages)
@@ -883,71 +931,94 @@ int _set_memory_uc(unsigned long addr, int numpages)
 
 int set_memory_uc(unsigned long addr, int numpages)
 {
+       int ret;
+
        /*
         * for now UC MINUS. see comments in ioremap_nocache()
         */
-       if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
-                           _PAGE_CACHE_UC_MINUS, NULL))
-               return -EINVAL;
+       ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+                           _PAGE_CACHE_UC_MINUS, NULL);
+       if (ret)
+               goto out_err;
 
-       return _set_memory_uc(addr, numpages);
+       ret = _set_memory_uc(addr, numpages);
+       if (ret)
+               goto out_free;
+
+       return 0;
+
+out_free:
+       free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+out_err:
+       return ret;
 }
 EXPORT_SYMBOL(set_memory_uc);
 
 int set_memory_array_uc(unsigned long *addr, int addrinarray)
 {
-       unsigned long start;
-       unsigned long end;
-       int i;
+       int i, j;
+       int ret;
+
        /*
         * for now UC MINUS. see comments in ioremap_nocache()
         */
        for (i = 0; i < addrinarray; i++) {
-               start = __pa(addr[i]);
-               for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
-                       if (end != __pa(addr[i + 1]))
-                               break;
-                       i++;
-               }
-               if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
-                       goto out;
+               ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
+                                       _PAGE_CACHE_UC_MINUS, NULL);
+               if (ret)
+                       goto out_free;
        }
 
-       return change_page_attr_set(addr, addrinarray,
+       ret = change_page_attr_set(addr, addrinarray,
                                    __pgprot(_PAGE_CACHE_UC_MINUS), 1);
-out:
-       for (i = 0; i < addrinarray; i++) {
-               unsigned long tmp = __pa(addr[i]);
-
-               if (tmp == start)
-                       break;
-               for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
-                       if (end != __pa(addr[i + 1]))
-                               break;
-                       i++;
-               }
-               free_memtype(tmp, end);
-       }
-       return -EINVAL;
+       if (ret)
+               goto out_free;
+
+       return 0;
+
+out_free:
+       for (j = 0; j < i; j++)
+               free_memtype(__pa(addr[j]), __pa(addr[j]) + PAGE_SIZE);
+
+       return ret;
 }
 EXPORT_SYMBOL(set_memory_array_uc);
 
 int _set_memory_wc(unsigned long addr, int numpages)
 {
-       return change_page_attr_set(&addr, numpages,
+       int ret;
+       ret = change_page_attr_set(&addr, numpages,
+                                   __pgprot(_PAGE_CACHE_UC_MINUS), 0);
+
+       if (!ret) {
+               ret = change_page_attr_set(&addr, numpages,
                                    __pgprot(_PAGE_CACHE_WC), 0);
+       }
+       return ret;
 }
 
 int set_memory_wc(unsigned long addr, int numpages)
 {
+       int ret;
+
        if (!pat_enabled)
                return set_memory_uc(addr, numpages);
 
-       if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
-               _PAGE_CACHE_WC, NULL))
-               return -EINVAL;
+       ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
+               _PAGE_CACHE_WC, NULL);
+       if (ret)
+               goto out_err;
+
+       ret = _set_memory_wc(addr, numpages);
+       if (ret)
+               goto out_free;
 
-       return _set_memory_wc(addr, numpages);
+       return 0;
+
+out_free:
+       free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+out_err:
+       return ret;
 }
 EXPORT_SYMBOL(set_memory_wc);
 
@@ -959,29 +1030,31 @@ int _set_memory_wb(unsigned long addr, int numpages)
 
 int set_memory_wb(unsigned long addr, int numpages)
 {
-       free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+       int ret;
+
+       ret = _set_memory_wb(addr, numpages);
+       if (ret)
+               return ret;
 
-       return _set_memory_wb(addr, numpages);
+       free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
+       return 0;
 }
 EXPORT_SYMBOL(set_memory_wb);
 
 int set_memory_array_wb(unsigned long *addr, int addrinarray)
 {
        int i;
+       int ret;
 
-       for (i = 0; i < addrinarray; i++) {
-               unsigned long start = __pa(addr[i]);
-               unsigned long end;
-
-               for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
-                       if (end != __pa(addr[i + 1]))
-                               break;
-                       i++;
-               }
-               free_memtype(start, end);
-       }
-       return change_page_attr_clear(addr, addrinarray,
+       ret = change_page_attr_clear(addr, addrinarray,
                                      __pgprot(_PAGE_CACHE_MASK), 1);
+       if (ret)
+               return ret;
+
+       for (i = 0; i < addrinarray; i++)
+               free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE);
+
+       return 0;
 }
 EXPORT_SYMBOL(set_memory_array_wb);
 
@@ -1017,7 +1090,7 @@ int set_memory_np(unsigned long addr, int numpages)
 int set_memory_4k(unsigned long addr, int numpages)
 {
        return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
-                                       __pgprot(0), 1, 0);
+                                       __pgprot(0), 1, 0, NULL);
 }
 
 int set_pages_uc(struct page *page, int numpages)
@@ -1028,6 +1101,35 @@ int set_pages_uc(struct page *page, int numpages)
 }
 EXPORT_SYMBOL(set_pages_uc);
 
+int set_pages_array_uc(struct page **pages, int addrinarray)
+{
+       unsigned long start;
+       unsigned long end;
+       int i;
+       int free_idx;
+
+       for (i = 0; i < addrinarray; i++) {
+               start = (unsigned long)page_address(pages[i]);
+               end = start + PAGE_SIZE;
+               if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
+                       goto err_out;
+       }
+
+       if (cpa_set_pages_array(pages, addrinarray,
+                       __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) {
+               return 0; /* Success */
+       }
+err_out:
+       free_idx = i;
+       for (i = 0; i < free_idx; i++) {
+               start = (unsigned long)page_address(pages[i]);
+               end = start + PAGE_SIZE;
+               free_memtype(start, end);
+       }
+       return -EINVAL;
+}
+EXPORT_SYMBOL(set_pages_array_uc);
+
 int set_pages_wb(struct page *page, int numpages)
 {
        unsigned long addr = (unsigned long)page_address(page);
@@ -1036,6 +1138,28 @@ int set_pages_wb(struct page *page, int numpages)
 }
 EXPORT_SYMBOL(set_pages_wb);
 
+int set_pages_array_wb(struct page **pages, int addrinarray)
+{
+       int retval;
+       unsigned long start;
+       unsigned long end;
+       int i;
+
+       retval = cpa_clear_pages_array(pages, addrinarray,
+                       __pgprot(_PAGE_CACHE_MASK));
+       if (retval)
+               return retval;
+
+       for (i = 0; i < addrinarray; i++) {
+               start = (unsigned long)page_address(pages[i]);
+               end = start + PAGE_SIZE;
+               free_memtype(start, end);
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(set_pages_array_wb);
+
 int set_pages_x(struct page *page, int numpages)
 {
        unsigned long addr = (unsigned long)page_address(page);