[PATCH] ppc32 8xx: update_mmu_cache() needs unconditional tlbie
Marcelo Tosatti [Mon, 14 Nov 2005 07:38:31 +0000 (05:38 -0200)]
Currently 8xx fails to boot due to endless pagefaults.

Seems the bug is exposed by the change which avoids flushing the
TLB when not necessary (in case the pte has not changed), introduced
recently:

__handle_mm_fault():

        entry = pte_mkyoung(entry);
        if (!pte_same(old_entry, entry)) {
                ptep_set_access_flags(vma, address, pte, entry, write_access);
                update_mmu_cache(vma, address, entry);
                lazy_mmu_prot_update(entry);
        } else {
                /*
                 * This is needed only for protection faults but the arch code
                 * is not yet telling us if this is a protection fault or not.
                 * This still avoids useless tlb flushes for .text page faults
                 * with threads.
                 */
                if (write_access)
                        flush_tlb_page(vma, address);
        }

The "update_mmu_cache()" call was unconditional before, which caused the TLB
to be flushed by:

        if (pfn_valid(pfn)) {
                struct page *page = pfn_to_page(pfn);
                if (!PageReserved(page)
                    && !test_bit(PG_arch_1, &page->flags)) {
                        if (vma->vm_mm == current->active_mm) {
#ifdef CONFIG_8xx
                        /* On 8xx, cache control instructions (particularly
                         * "dcbst" from flush_dcache_icache) fault as write
                         * operation if there is an unpopulated TLB entry
                         * for the address in question. To workaround that,
                         * we invalidate the TLB here, thus avoiding dcbst
                         * misbehaviour.
                         */
                                _tlbie(address);
#endif
                                __flush_dcache_icache((void *) address);
                        } else
                                flush_dcache_icache_page(page);
                        set_bit(PG_arch_1, &page->flags);
                }

Which worked to due to pure luck: PG_arch_1 was always unset before, but
now it isnt.

The root of the problem are the changes against the 8xx TLB handlers introduced
during v2.6. What happens is the TLBMiss handlers load the zeroed pte into
the TLB, causing the TLBError handler to be invoked (thats two TLB faults per
pagefault), which then jumps to the generic MM code to setup the pte.

The bug is that the zeroed TLB is not invalidated (the same reason
for the "dcbst" misbehaviour), resulting in infinite TLBError faults.

The "two exception" approach requires a TLB flush (to nuke the zeroed TLB)
at each PTE update for correct behaviour:

Signed-off-by: Marcelo Tosatti <marcelo.tosatti@cyclades.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>

arch/ppc/mm/init.c

index 99b48ab..45f0782 100644 (file)
@@ -597,21 +597,20 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 
        if (pfn_valid(pfn)) {
                struct page *page = pfn_to_page(pfn);
-               if (!PageReserved(page)
-                   && !test_bit(PG_arch_1, &page->flags)) {
-                       if (vma->vm_mm == current->active_mm) {
 #ifdef CONFIG_8xx
-                       /* On 8xx, cache control instructions (particularly 
-                        * "dcbst" from flush_dcache_icache) fault as write 
-                        * operation if there is an unpopulated TLB entry 
-                        * for the address in question. To workaround that, 
-                        * we invalidate the TLB here, thus avoiding dcbst 
-                        * misbehaviour.
-                        */
-                               _tlbie(address);
+               /* On 8xx, the TLB handlers work in 2 stages:
+                * First, a zeroed entry is loaded by TLBMiss handler,
+                * which causes the TLBError handler to be triggered.
+                * That means the zeroed TLB has to be invalidated
+                * whenever a page miss occurs.
+                */
+               _tlbie(address);
 #endif
+               if (!PageReserved(page)
+                   && !test_bit(PG_arch_1, &page->flags)) {
+                       if (vma->vm_mm == current->active_mm)
                                __flush_dcache_icache((void *) address);
-                       } else
+                       else
                                flush_dcache_icache_page(page);
                        set_bit(PG_arch_1, &page->flags);
                }