[IA64] improve flush_icache_range()
Zoltan Menyhart [Fri, 3 Jun 2005 12:36:00 +0000 (05:36 -0700)]
Check with PAL to see what the i-cache line size is for
each level of the cache, and so use the correct stride
when flushing the cache.

Acked-by: David Mosberger
Signed-off-by: Tony Luck <tony.luck@intel.com>

arch/ia64/kernel/setup.c
arch/ia64/lib/flush.S

index 2693e15..7fc891a 100644 (file)
@@ -20,6 +20,7 @@
  * 02/01/00 R.Seth     fixed get_cpuinfo for SMP
  * 01/07/99 S.Eranian  added the support for command line argument
  * 06/24/99 W.Drummond added boot_cpu_data.
+ * 05/28/05 Z. Menyhart        Dynamic stride size for "flush_icache_range()"
  */
 #include <linux/config.h>
 #include <linux/module.h>
@@ -83,6 +84,13 @@ EXPORT_SYMBOL(io_space);
 unsigned int num_io_spaces;
 
 /*
+ * "flush_icache_range()" needs to know what processor dependent stride size to use
+ * when it makes i-cache(s) coherent with d-caches.
+ */
+#define        I_CACHE_STRIDE_SHIFT    5       /* Safest way to go: 32 bytes by 32 bytes */
+unsigned long ia64_i_cache_stride_shift = ~0;
+
+/*
  * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1).  This
  * mask specifies a mask of address bits that must be 0 in order for two buffers to be
  * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
@@ -626,6 +634,12 @@ setup_per_cpu_areas (void)
        /* start_kernel() requires this... */
 }
 
+/*
+ * Calculate the max. cache line size.
+ *
+ * In addition, the minimum of the i-cache stride sizes is calculated for
+ * "flush_icache_range()".
+ */
 static void
 get_max_cacheline_size (void)
 {
@@ -639,6 +653,8 @@ get_max_cacheline_size (void)
                 printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
                        __FUNCTION__, status);
                 max = SMP_CACHE_BYTES;
+               /* Safest setup for "flush_icache_range()" */
+               ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
                goto out;
         }
 
@@ -647,14 +663,31 @@ get_max_cacheline_size (void)
                                                    &cci);
                if (status != 0) {
                        printk(KERN_ERR
-                              "%s: ia64_pal_cache_config_info(l=%lu) failed (status=%ld)\n",
+                              "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
                               __FUNCTION__, l, status);
                        max = SMP_CACHE_BYTES;
+                       /* The safest setup for "flush_icache_range()" */
+                       cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+                       cci.pcci_unified = 1;
                }
                line_size = 1 << cci.pcci_line_size;
                if (line_size > max)
                        max = line_size;
-        }
+               if (!cci.pcci_unified) {
+                       status = ia64_pal_cache_config_info(l,
+                                                   /* cache_type (instruction)= */ 1,
+                                                   &cci);
+                       if (status != 0) {
+                               printk(KERN_ERR
+                               "%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
+                                       __FUNCTION__, l, status);
+                               /* The safest setup for "flush_icache_range()" */
+                               cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
+                       }
+               }
+               if (cci.pcci_stride < ia64_i_cache_stride_shift)
+                       ia64_i_cache_stride_shift = cci.pcci_stride;
+       }
   out:
        if (max > ia64_max_cacheline_size)
                ia64_max_cacheline_size = max;
index a1af914..3e2cfa2 100644 (file)
@@ -3,37 +3,59 @@
  *
  * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co
  *     David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 05/28/05 Zoltan Menyhart    Dynamic stride size
  */
+
 #include <asm/asmmacro.h>
-#include <asm/page.h>
+
 
        /*
         * flush_icache_range(start,end)
-        *      Must flush range from start to end-1 but nothing else (need to
+        *
+        *      Make i-cache(s) coherent with d-caches.
+        *
+        *      Must deal with range from start to end-1 but nothing else (need to
         *      be careful not to touch addresses that may be unmapped).
+        *
+        *      Note: "in0" and "in1" are preserved for debugging purposes.
         */
 GLOBAL_ENTRY(flush_icache_range)
+
        .prologue
-       alloc r2=ar.pfs,2,0,0,0
-       sub r8=in1,in0,1
+       alloc   r2=ar.pfs,2,0,0,0
+       movl    r3=ia64_i_cache_stride_shift
+       mov     r21=1
+       ;;
+       ld8     r20=[r3]                // r20: stride shift
+       sub     r22=in1,r0,1            // last byte address
        ;;
-       shr.u r8=r8,5                   // we flush 32 bytes per iteration
-       .save ar.lc, r3
-       mov r3=ar.lc                    // save ar.lc
+       shr.u   r23=in0,r20             // start / (stride size)
+       shr.u   r22=r22,r20             // (last byte address) / (stride size)
+       shl     r21=r21,r20             // r21: stride size of the i-cache(s)
+       ;;
+       sub     r8=r22,r23              // number of strides - 1
+       shl     r24=r23,r20             // r24: addresses for "fc.i" =
+                                       //      "start" rounded down to stride boundary
+       .save   ar.lc,r3
+       mov     r3=ar.lc                // save ar.lc
        ;;
 
        .body
-
-       mov ar.lc=r8
+       mov     ar.lc=r8
        ;;
-.Loop: fc.i in0                        // issuable on M2 only
-       add in0=32,in0
+       /*
+        * 32 byte aligned loop, even number of (actually 2) bundles
+        */
+.Loop: fc.i    r24                     // issuable on M0 only
+       add     r24=r21,r24             // we flush "stride size" bytes per iteration
+       nop.i   0
        br.cloop.sptk.few .Loop
        ;;
        sync.i
        ;;
        srlz.i
        ;;
-       mov ar.lc=r3                    // restore ar.lc
+       mov     ar.lc=r3                // restore ar.lc
        br.ret.sptk.many rp
 END(flush_icache_range)