GRU driver: minor updates
Jack Steiner [Thu, 16 Oct 2008 05:05:13 +0000 (22:05 -0700)]
A few minor updates for the GRU driver.
- documentation changes found in code reviews
- changes to #ifdefs to make them recognized by "unifdef"
  (used in simulator testing)
- change GRU context load/unload to prefetch data

[akpm@linux-foundation.org: fix typo in comment]
Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

drivers/misc/sgi-gru/gru.h
drivers/misc/sgi-gru/gru_instructions.h
drivers/misc/sgi-gru/grufault.c
drivers/misc/sgi-gru/grufile.c
drivers/misc/sgi-gru/gruhandles.h
drivers/misc/sgi-gru/grukservices.c
drivers/misc/sgi-gru/grumain.c

index 40df7cb..f93f03a 100644 (file)
@@ -30,9 +30,9 @@
 /*
  * Size used to map GRU GSeg
  */
-#if defined CONFIG_IA64
+#if defined(CONFIG_IA64)
 #define GRU_GSEG_PAGESIZE      (256 * 1024UL)
-#elif defined CONFIG_X86_64
+#elif defined(CONFIG_X86_64)
 #define GRU_GSEG_PAGESIZE      (256 * 1024UL)          /* ZZZ 2MB ??? */
 #else
 #error "Unsupported architecture"
index 0dc3622..48762e7 100644 (file)
@@ -26,7 +26,7 @@
  * Architecture dependent functions
  */
 
-#if defined CONFIG_IA64
+#if defined(CONFIG_IA64)
 #include <linux/compiler.h>
 #include <asm/intrinsics.h>
 #define __flush_cache(p)               ia64_fc(p)
@@ -36,7 +36,7 @@
                        barrier();                                      \
                        *((volatile int *)(p)) = v; /* force st.rel */  \
                } while (0)
-#elif defined CONFIG_X86_64
+#elif defined(CONFIG_X86_64)
 #define __flush_cache(p)               clflush(p)
 #define gru_ordered_store_int(p,v)                                     \
                do {                                                    \
@@ -299,6 +299,7 @@ static inline void gru_flush_cache(void *p)
 static inline void gru_start_instruction(struct gru_instruction *ins, int op32)
 {
        gru_ordered_store_int(ins, op32);
+       gru_flush_cache(ins);
 }
 
 
@@ -604,8 +605,9 @@ static inline int gru_get_cb_substatus(void *cb)
 static inline int gru_check_status(void *cb)
 {
        struct gru_control_block_status *cbs = (void *)cb;
-       int ret = cbs->istatus;
+       int ret;
 
+       ret = cbs->istatus;
        if (ret == CBS_CALL_OS)
                ret = gru_check_status_proc(cb);
        return ret;
@@ -617,7 +619,7 @@ static inline int gru_check_status(void *cb)
 static inline int gru_wait(void *cb)
 {
        struct gru_control_block_status *cbs = (void *)cb;
-       int ret = cbs->istatus;;
+       int ret = cbs->istatus;
 
        if (ret != CBS_IDLE)
                ret = gru_wait_proc(cb);
index 3d33015..8c389d6 100644 (file)
@@ -214,12 +214,14 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma,
 }
 
 /*
- *
  * atomic_pte_lookup
  *
  * Convert a user virtual address to a physical address
  * Only supports Intel large pages (2MB only) on x86_64.
  *     ZZZ - hugepage support is incomplete
+ *
+ * NOTE: mmap_sem is already held on entry to this function. This
+ * guarantees existence of the page tables.
  */
 static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
        int write, unsigned long *paddr, int *pageshift)
@@ -229,9 +231,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
        pud_t *pudp;
        pte_t pte;
 
-       WARN_ON(irqs_disabled());               /* ZZZ debug */
-
-       local_irq_disable();
        pgdp = pgd_offset(vma->vm_mm, vaddr);
        if (unlikely(pgd_none(*pgdp)))
                goto err;
@@ -250,8 +249,6 @@ static int atomic_pte_lookup(struct vm_area_struct *vma, unsigned long vaddr,
 #endif
                pte = *pte_offset_kernel(pmdp, vaddr);
 
-       local_irq_enable();
-
        if (unlikely(!pte_present(pte) ||
                     (write && (!pte_write(pte) || !pte_dirty(pte)))))
                return 1;
@@ -324,6 +321,7 @@ static int gru_try_dropin(struct gru_thread_state *gts,
         * Atomic lookup is faster & usually works even if called in non-atomic
         * context.
         */
+       rmb();  /* Must/check ms_range_active before loading PTEs */
        ret = atomic_pte_lookup(vma, vaddr, write, &paddr, &pageshift);
        if (ret) {
                if (!cb)
@@ -543,6 +541,7 @@ int gru_get_exception_detail(unsigned long arg)
                ucbnum = get_cb_number((void *)excdet.cb);
                cbrnum = thread_cbr_number(gts, ucbnum);
                cbe = get_cbe_by_index(gts->ts_gru, cbrnum);
+               prefetchw(cbe);         /* Harmless on hardware, required for emulator */
                excdet.opc = cbe->opccpy;
                excdet.exopc = cbe->exopccpy;
                excdet.ecause = cbe->ecause;
index d61cee7..5c027b6 100644 (file)
@@ -113,7 +113,7 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
                return -EPERM;
 
        if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
-                       vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
+                               vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
                return -EINVAL;
 
        vma->vm_flags |=
@@ -398,6 +398,12 @@ static int __init gru_init(void)
        irq = get_base_irq();
        for (chip = 0; chip < GRU_CHIPLETS_PER_BLADE; chip++) {
                ret = request_irq(irq + chip, gru_intr, 0, id, NULL);
+               /* TODO: fix irq handling on x86. For now ignore failures because
+                * interrupts are not required & not yet fully supported */
+               if (ret) {
+                       printk("!!!WARNING: GRU ignoring request failure!!!\n");
+                       ret = 0;
+               }
                if (ret) {
                        printk(KERN_ERR "%s: request_irq failed\n",
                               GRU_DRIVER_ID_STR);
index d16031d..b63018d 100644 (file)
 #define GSEGPOFF(h)            ((h) & (GRU_SIZE - 1))
 
 /* Convert an arbitrary handle address to the beginning of the GRU segment */
-#ifndef __PLUGIN__
 #define GRUBASE(h)             ((void *)((unsigned long)(h) & ~(GRU_SIZE - 1)))
-#else
-extern void *gmu_grubase(void *h);
-#define GRUBASE(h)             gmu_grubase(h)
-#endif
 
 /* General addressing macros. */
 static inline void *get_gseg_base_address(void *base, int ctxnum)
index 08d29cd..880c55d 100644 (file)
@@ -122,6 +122,7 @@ int gru_get_cb_exception_detail(void *cb,
        struct gru_control_block_extended *cbe;
 
        cbe = get_cbe(GRUBASE(cb), get_cb_number(cb));
+       prefetchw(cbe);         /* Harmless on hardware, required for emulator */
        excdet->opc = cbe->opccpy;
        excdet->exopc = cbe->exopccpy;
        excdet->ecause = cbe->ecause;
index 0eeb8dd..e11e1ac 100644 (file)
@@ -432,29 +432,35 @@ static inline long gru_copy_handle(void *d, void *s)
        return GRU_HANDLE_BYTES;
 }
 
-/* rewrite in assembly & use lots of prefetch */
-static void gru_load_context_data(void *save, void *grubase, int ctxnum,
-                                 unsigned long cbrmap, unsigned long dsrmap)
+static void gru_prefetch_context(void *gseg, void *cb, void *cbe, unsigned long cbrmap,
+                               unsigned long length)
 {
-       void *gseg, *cb, *cbe;
-       unsigned long length;
        int i, scr;
 
-       gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
-       length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
        prefetch_data(gseg + GRU_DS_BASE, length / GRU_CACHE_LINE_BYTES,
                      GRU_CACHE_LINE_BYTES);
 
-       cb = gseg + GRU_CB_BASE;
-       cbe = grubase + GRU_CBE_BASE;
        for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
                prefetch_data(cb, 1, GRU_CACHE_LINE_BYTES);
                prefetch_data(cbe + i * GRU_HANDLE_STRIDE, 1,
                              GRU_CACHE_LINE_BYTES);
                cb += GRU_HANDLE_STRIDE;
        }
+}
+
+static void gru_load_context_data(void *save, void *grubase, int ctxnum,
+                                 unsigned long cbrmap, unsigned long dsrmap)
+{
+       void *gseg, *cb, *cbe;
+       unsigned long length;
+       int i, scr;
 
+       gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
        cb = gseg + GRU_CB_BASE;
+       cbe = grubase + GRU_CBE_BASE;
+       length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+       gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
+
        for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
                save += gru_copy_handle(cb, save);
                save += gru_copy_handle(cbe + i * GRU_HANDLE_STRIDE, save);
@@ -472,15 +478,16 @@ static void gru_unload_context_data(void *save, void *grubase, int ctxnum,
        int i, scr;
 
        gseg = grubase + ctxnum * GRU_GSEG_STRIDE;
-
        cb = gseg + GRU_CB_BASE;
        cbe = grubase + GRU_CBE_BASE;
+       length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
+       gru_prefetch_context(gseg, cb, cbe, cbrmap, length);
+
        for_each_cbr_in_allocation_map(i, &cbrmap, scr) {
                save += gru_copy_handle(save, cb);
                save += gru_copy_handle(save, cbe + i * GRU_HANDLE_STRIDE);
                cb += GRU_HANDLE_STRIDE;
        }
-       length = hweight64(dsrmap) * GRU_DSR_AU_BYTES;
        memcpy(save, gseg + GRU_DS_BASE, length);
 }