[IA64-SGI] - New SN hardware support - ptc_fixes
Jack Steiner [Thu, 11 Aug 2005 17:28:00 +0000 (10:28 -0700)]
Shub2 provides a much improved mechanism for issuing internode
TLB purges. Add code to support the newer mechanism. There is also
some debug code (disabled) that is useful for testing.

Collect statistics on the number, type & duration of TLB purges.
This data will be useful for making future improvements in the algorithms.

Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

arch/ia64/sn/kernel/setup.c
arch/ia64/sn/kernel/sn2/ptc_deadlock.S
arch/ia64/sn/kernel/sn2/sn2_smp.c
include/asm-ia64/sn/nodepda.h

index ed77715..6648deb 100644 (file)
@@ -403,6 +403,7 @@ static void __init sn_init_pdas(char **cmdline_p)
                memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
                memset(nodepdaindr[cnode]->phys_cpuid, -1,
                    sizeof(nodepdaindr[cnode]->phys_cpuid));
+               spin_lock_init(&nodepdaindr[cnode]->ptc_lock);
        }
 
        /*
index 96cb71d..3fa9506 100644 (file)
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <asm/types.h>
@@ -11,7 +11,7 @@
 
 #define DEADLOCKBIT    SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT
 #define WRITECOUNTMASK SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK
-#define ALIAS_OFFSET   (SH1_PIO_WRITE_STATUS_0_ALIAS-SH1_PIO_WRITE_STATUS_0)
+#define ALIAS_OFFSET   8
 
 
        .global sn2_ptc_deadlock_recovery_core
@@ -36,13 +36,15 @@ sn2_ptc_deadlock_recovery_core:
        extr.u  piowcphy=piowc,0,61;;   // Convert piowc to uncached physical address
        dep     piowcphy=-1,piowcphy,63,1
        movl    mask=WRITECOUNTMASK
+       mov     r8=r0
 
 1:
        add     scr2=ALIAS_OFFSET,piowc // Address of WRITE_STATUS alias register 
-       mov     scr1=7;;                // Clear DEADLOCK, WRITE_ERROR, MULTI_WRITE_ERROR
-       st8.rel [scr2]=scr1;;
+       ;;
+       ld8.acq scr1=[scr2];;
 
 5:     ld8.acq scr1=[piowc];;          // Wait for PIOs to complete.
+       hint    @pause
        and     scr2=scr1,mask;;        // mask of writecount bits
        cmp.ne  p6,p0=zeroval,scr2
 (p6)   br.cond.sptk 5b
@@ -57,6 +59,7 @@ sn2_ptc_deadlock_recovery_core:
        st8.rel [ptc0]=data0            // Write PTC0 & wait for completion.
 
 5:     ld8.acq scr1=[piowcphy];;       // Wait for PIOs to complete.
+       hint    @pause
        and     scr2=scr1,mask;;        // mask of writecount bits
        cmp.ne  p6,p0=zeroval,scr2
 (p6)   br.cond.sptk 5b;;
@@ -67,6 +70,7 @@ sn2_ptc_deadlock_recovery_core:
 (p7)   st8.rel [ptc1]=data1;;          // Now write PTC1.
 
 5:     ld8.acq scr1=[piowcphy];;       // Wait for PIOs to complete.
+       hint    @pause
        and     scr2=scr1,mask;;        // mask of writecount bits
        cmp.ne  p6,p0=zeroval,scr2
 (p6)   br.cond.sptk 5b
@@ -77,6 +81,7 @@ sn2_ptc_deadlock_recovery_core:
        srlz.i;;
        ////////////// END   PHYSICAL MODE ////////////////////
 
+(p8)   add     r8=1,r8
 (p8)   br.cond.spnt 1b;;               // Repeat if DEADLOCK occurred.
 
        br.ret.sptk     rp
index 7af05a7..0a4ee50 100644 (file)
@@ -5,7 +5,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
  */
 
 #include <linux/init.h>
@@ -20,6 +20,8 @@
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/nodemask.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 #include <asm/processor.h>
 #include <asm/irq.h>
 #include <asm/sn/nodepda.h>
 #include <asm/sn/rw_mmr.h>
 
-void sn2_ptc_deadlock_recovery(volatile unsigned long *, unsigned long data0, 
-       volatile unsigned long *, unsigned long data1);
+DEFINE_PER_CPU(struct ptc_stats, ptcstats);
+DECLARE_PER_CPU(struct ptc_stats, ptcstats);
 
 static  __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
 
-static unsigned long sn2_ptc_deadlock_count;
+void sn2_ptc_deadlock_recovery(short *, short, int, volatile unsigned long *, unsigned long data0,
+       volatile unsigned long *, unsigned long data1);
+
+#ifdef DEBUG_PTC
+/*
+ * ptctest:
+ *
+ *     xyz - 3 digit hex number:
+ *             x - Force PTC purges to use shub:
+ *                     0 - no force
+ *                     1 - force
+ *             y - interupt enable
+ *                     0 - disable interrupts
+ *                     1 - leave interuupts enabled
+ *             z - type of lock:
+ *                     0 - global lock
+ *                     1 - node local lock
+ *                     2 - no lock
+ *
+ *     Note: on shub1, only ptctest == 0 is supported. Don't try other values!
+ */
+
+static unsigned int sn2_ptctest = 0;
+
+static int __init ptc_test(char *str)
+{
+       get_option(&str, &sn2_ptctest);
+       return 1;
+}
+__setup("ptctest=", ptc_test);
+
+static inline int ptc_lock(unsigned long *flagp)
+{
+       unsigned long opt = sn2_ptctest & 255;
+
+       switch (opt) {
+       case 0x00:
+               spin_lock_irqsave(&sn2_global_ptc_lock, *flagp);
+               break;
+       case 0x01:
+               spin_lock_irqsave(&sn_nodepda->ptc_lock, *flagp);
+               break;
+       case 0x02:
+               local_irq_save(*flagp);
+               break;
+       case 0x10:
+               spin_lock(&sn2_global_ptc_lock);
+               break;
+       case 0x11:
+               spin_lock(&sn_nodepda->ptc_lock);
+               break;
+       case 0x12:
+               break;
+       default:
+               BUG();
+       }
+       return opt;
+}
+
+static inline void ptc_unlock(unsigned long flags, int opt)
+{
+       switch (opt) {
+       case 0x00:
+               spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
+               break;
+       case 0x01:
+               spin_unlock_irqrestore(&sn_nodepda->ptc_lock, flags);
+               break;
+       case 0x02:
+               local_irq_restore(flags);
+               break;
+       case 0x10:
+               spin_unlock(&sn2_global_ptc_lock);
+               break;
+       case 0x11:
+               spin_unlock(&sn_nodepda->ptc_lock);
+               break;
+       case 0x12:
+               break;
+       default:
+               BUG();
+       }
+}
+#else
+
+#define sn2_ptctest    0
+
+static inline int ptc_lock(unsigned long *flagp)
+{
+       spin_lock_irqsave(&sn2_global_ptc_lock, *flagp);
+       return 0;
+}
+
+static inline void ptc_unlock(unsigned long flags, int opt)
+{
+       spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
+}
+#endif
+
+struct ptc_stats {
+       unsigned long ptc_l;
+       unsigned long change_rid;
+       unsigned long shub_ptc_flushes;
+       unsigned long nodes_flushed;
+       unsigned long deadlocks;
+       unsigned long lock_itc_clocks;
+       unsigned long shub_itc_clocks;
+       unsigned long shub_itc_clocks_max;
+};
 
 static inline unsigned long wait_piowc(void)
 {
@@ -89,9 +199,9 @@ void
 sn2_global_tlb_purge(unsigned long start, unsigned long end,
                     unsigned long nbits)
 {
-       int i, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0;
+       int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0;
        volatile unsigned long *ptc0, *ptc1;
-       unsigned long flags = 0, data0 = 0, data1 = 0;
+       unsigned long itc, itc2, flags, data0 = 0, data1 = 0;
        struct mm_struct *mm = current->active_mm;
        short nasids[MAX_NUMNODES], nix;
        nodemask_t nodes_flushed;
@@ -114,16 +224,19 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
                        start += (1UL << nbits);
                } while (start < end);
                ia64_srlz_i();
+               __get_cpu_var(ptcstats).ptc_l++;
                preempt_enable();
                return;
        }
 
        if (atomic_read(&mm->mm_users) == 1) {
                flush_tlb_mm(mm);
+               __get_cpu_var(ptcstats).change_rid++;
                preempt_enable();
                return;
        }
 
+       itc = ia64_get_itc();
        nix = 0;
        for_each_node_mask(cnode, nodes_flushed)
                nasids[nix++] = cnodeid_to_nasid(cnode);
@@ -148,7 +261,12 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
 
        mynasid = get_nasid();
 
-       spin_lock_irqsave(&sn2_global_ptc_lock, flags);
+       itc = ia64_get_itc();
+       opt = ptc_lock(&flags);
+       itc2 = ia64_get_itc();
+       __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc;
+       __get_cpu_var(ptcstats).shub_ptc_flushes++;
+       __get_cpu_var(ptcstats).nodes_flushed += nix;
 
        do {
                if (shub1)
@@ -157,7 +275,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
                        data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
                for (i = 0; i < nix; i++) {
                        nasid = nasids[i];
-                       if (unlikely(nasid == mynasid)) {
+                       if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid)) {
                                ia64_ptcga(start, nbits << 2);
                                ia64_srlz_i();
                        } else {
@@ -169,18 +287,22 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
                                flushed = 1;
                        }
                }
-
                if (flushed
                    && (wait_piowc() &
-                       SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK)) {
-                       sn2_ptc_deadlock_recovery(ptc0, data0, ptc1, data1);
+                               (SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK))) {
+                       sn2_ptc_deadlock_recovery(nasids, nix, mynasid, ptc0, data0, ptc1, data1);
                }
 
                start += (1UL << nbits);
 
        } while (start < end);
 
-       spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
+       itc2 = ia64_get_itc() - itc2;
+       __get_cpu_var(ptcstats).shub_itc_clocks += itc2;
+       if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max)
+               __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2;
+
+       ptc_unlock(flags, opt);
 
        preempt_enable();
 }
@@ -192,31 +314,29 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end,
  * TLB flush transaction.  The recovery sequence is somewhat tricky & is
  * coded in assembly language.
  */
-void sn2_ptc_deadlock_recovery(volatile unsigned long *ptc0, unsigned long data0,
+void sn2_ptc_deadlock_recovery(short *nasids, short nix, int mynasid, volatile unsigned long *ptc0, unsigned long data0,
        volatile unsigned long *ptc1, unsigned long data1)
 {
        extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
                volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long);
-       int cnode, mycnode, nasid;
-       volatile unsigned long *piows;
-       volatile unsigned long zeroval;
+       short nasid, i;
+       unsigned long *piows, zeroval;
 
-       sn2_ptc_deadlock_count++;
+       __get_cpu_var(ptcstats).deadlocks++;
 
-       piows = pda->pio_write_status_addr;
+       piows = (unsigned long *) pda->pio_write_status_addr;
        zeroval = pda->pio_write_status_val;
 
-       mycnode = numa_node_id();
-
-       for_each_online_node(cnode) {
-               if (is_headless_node(cnode) || cnode == mycnode)
+       for (i=0; i < nix; i++) {
+               nasid = nasids[i];
+               if (!(sn2_ptctest & 3) && nasid == mynasid)
                        continue;
-               nasid = cnodeid_to_nasid(cnode);
                ptc0 = CHANGE_NASID(nasid, ptc0);
                if (ptc1)
                        ptc1 = CHANGE_NASID(nasid, ptc1);
                sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
        }
+
 }
 
 /**
@@ -293,3 +413,93 @@ void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
 
        sn_send_IPI_phys(nasid, physid, vector, delivery_mode);
 }
+
+#ifdef CONFIG_PROC_FS
+
+#define PTC_BASENAME   "sgi_sn/ptc_statistics"
+
+static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset)
+{
+       if (*offset < NR_CPUS)
+               return offset;
+       return NULL;
+}
+
+static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset)
+{
+       (*offset)++;
+       if (*offset < NR_CPUS)
+               return offset;
+       return NULL;
+}
+
+static void sn2_ptc_seq_stop(struct seq_file *file, void *data)
+{
+}
+
+static int sn2_ptc_seq_show(struct seq_file *file, void *data)
+{
+       struct ptc_stats *stat;
+       int cpu;
+
+       cpu = *(loff_t *) data;
+
+       if (!cpu) {
+               seq_printf(file, "# ptc_l change_rid shub_ptc_flushes shub_nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max\n");
+               seq_printf(file, "# ptctest %d\n", sn2_ptctest);
+       }
+
+       if (cpu < NR_CPUS && cpu_online(cpu)) {
+               stat = &per_cpu(ptcstats, cpu);
+               seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l,
+                               stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed,
+                               stat->deadlocks,
+                               1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
+                               1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec,
+                               1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec);
+       }
+
+       return 0;
+}
+
+static struct seq_operations sn2_ptc_seq_ops = {
+       .start = sn2_ptc_seq_start,
+       .next = sn2_ptc_seq_next,
+       .stop = sn2_ptc_seq_stop,
+       .show = sn2_ptc_seq_show
+};
+
+int sn2_ptc_proc_open(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &sn2_ptc_seq_ops);
+}
+
+static struct file_operations proc_sn2_ptc_operations = {
+       .open = sn2_ptc_proc_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = seq_release,
+};
+
+static struct proc_dir_entry *proc_sn2_ptc;
+
+static int __init sn2_ptc_init(void)
+{
+       if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) {
+               printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME);
+               return -EINVAL;
+       }
+       proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations;
+       spin_lock_init(&sn2_global_ptc_lock);
+       return 0;
+}
+
+static void __exit sn2_ptc_exit(void)
+{
+       remove_proc_entry(PTC_BASENAME, NULL);
+}
+
+module_init(sn2_ptc_init);
+module_exit(sn2_ptc_exit);
+#endif /* CONFIG_PROC_FS */
+
index 7138b1e..47bb810 100644 (file)
@@ -37,7 +37,6 @@ struct phys_cpuid {
 
 struct nodepda_s {
        void            *pdinfo;        /* Platform-dependent per-node info */
-       spinlock_t              bist_lock;
 
        /*
         * The BTEs on this node are shared by the local cpus
@@ -55,6 +54,8 @@ struct nodepda_s {
         * Array of physical cpu identifiers. Indexed by cpuid.
         */
        struct phys_cpuid       phys_cpuid[NR_CPUS];
+       spinlock_t              ptc_lock ____cacheline_aligned_in_smp;
+       spinlock_t              bist_lock;
 };
 
 typedef struct nodepda_s nodepda_t;