Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
Linus Torvalds [Mon, 23 May 2011 15:39:24 +0000 (08:39 -0700)]
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6: (27 commits)
  bnx2x: allow device properly initialize after hotplug
  bnx2x: fix DMAE timeout according to hw specifications
  bnx2x: properly handle CFC DEL in cnic flow
  bnx2x: call dev_kfree_skb_any instead of dev_kfree_skb
  net: filter: move forward declarations to avoid compile warnings
  pktgen: refactor pg_init() code
  pktgen: use vzalloc_node() instead of vmalloc_node() + memset()
  net: skb_trim explicitely check the linearity instead of data_len
  ipv4: Give backtrace in ip_rt_bug().
  net: avoid synchronize_rcu() in dev_deactivate_many
  net: remove synchronize_net() from netdev_set_master()
  rtnetlink: ignore NETDEV_RELEASE and NETDEV_JOIN event
  net: rename NETDEV_BONDING_DESLAVE to NETDEV_RELEASE
  bridge: call NETDEV_JOIN notifiers when add a slave
  netpoll: disable netpoll when enslave a device
  macvlan: Forward unicast frames in bridge mode to lowerdev
  net: Remove linux/prefetch.h include from linux/skbuff.h
  ipv4: Include linux/prefetch.h in fib_trie.c
  netlabel: Remove prefetches from list handlers.
  drivers/net: add prefetch header for prefetch users
  ...

Fixed up prefetch parts: removed a few duplicate prefetch.h includes,
fixed the location of the igb prefetch.h, took my version of the
skbuff.h code without the extra parentheses etc.

113 files changed:
arch/sparc/Kconfig
arch/sparc/include/asm/cpudata_32.h
arch/sparc/include/asm/floppy_32.h
arch/sparc/include/asm/io.h
arch/sparc/include/asm/irq_32.h
arch/sparc/include/asm/leon.h
arch/sparc/include/asm/pcic.h
arch/sparc/include/asm/pgtable_32.h
arch/sparc/include/asm/pgtable_64.h
arch/sparc/include/asm/setup.h
arch/sparc/include/asm/smp_32.h
arch/sparc/include/asm/smp_64.h
arch/sparc/include/asm/spinlock_32.h
arch/sparc/include/asm/system_32.h
arch/sparc/include/asm/system_64.h
arch/sparc/include/asm/winmacro.h
arch/sparc/kernel/Makefile
arch/sparc/kernel/cpu.c
arch/sparc/kernel/cpumap.c
arch/sparc/kernel/devices.c
arch/sparc/kernel/ds.c
arch/sparc/kernel/entry.S
arch/sparc/kernel/head_32.S
arch/sparc/kernel/ioport.c
arch/sparc/kernel/irq.h
arch/sparc/kernel/irq_32.c
arch/sparc/kernel/irq_64.c
arch/sparc/kernel/kernel.h
arch/sparc/kernel/leon_kernel.c
arch/sparc/kernel/leon_smp.c
arch/sparc/kernel/mdesc.c
arch/sparc/kernel/of_device_64.c
arch/sparc/kernel/pci_msi.c
arch/sparc/kernel/pcic.c
arch/sparc/kernel/perf_event.c
arch/sparc/kernel/process_32.c
arch/sparc/kernel/prom_32.c
arch/sparc/kernel/setup_32.c
arch/sparc/kernel/setup_64.c
arch/sparc/kernel/smp_32.c
arch/sparc/kernel/smp_64.c
arch/sparc/kernel/sun4c_irq.c
arch/sparc/kernel/sun4d_irq.c
arch/sparc/kernel/sun4d_smp.c
arch/sparc/kernel/sun4m_irq.c
arch/sparc/kernel/sun4m_smp.c
arch/sparc/kernel/sysfs.c
arch/sparc/kernel/time_32.c
arch/sparc/kernel/us2e_cpufreq.c
arch/sparc/kernel/us3_cpufreq.c
arch/sparc/lib/Makefile
arch/sparc/lib/rwsem_32.S [deleted file]
arch/sparc/mm/init_64.c
arch/x86/kernel/cpu/common.c
drivers/block/drbd/drbd_int.h
drivers/dma/ioat/dma.c
drivers/dma/ioat/dma_v2.c
drivers/dma/ioat/dma_v3.c
drivers/ide/ide-acpi.c
drivers/ide/ide-floppy.c
drivers/ide/ide-scan-pci.c
drivers/ide/pmac.c
drivers/infiniband/hw/amso1100/c2.c
drivers/md/bitmap.c
drivers/md/md.c
drivers/md/multipath.c
drivers/md/multipath.h
drivers/md/raid1.c
drivers/md/raid1.h
drivers/md/raid10.c
drivers/md/raid5.c
drivers/net/igb/igb_main.c
fs/compat.c
fs/exec.c
fs/gfs2/bmap.c
fs/gfs2/log.c
fs/gfs2/rgrp.c
fs/nilfs2/alloc.c
fs/nilfs2/bmap.c
fs/nilfs2/btnode.c
fs/nilfs2/btnode.h
fs/nilfs2/btree.c
fs/nilfs2/cpfile.c
fs/nilfs2/dat.c
fs/nilfs2/file.c
fs/nilfs2/gcinode.c
fs/nilfs2/ifile.c
fs/nilfs2/inode.c
fs/nilfs2/ioctl.c
fs/nilfs2/mdt.c
fs/nilfs2/mdt.h
fs/nilfs2/nilfs.h
fs/nilfs2/page.c
fs/nilfs2/page.h
fs/nilfs2/recovery.c
fs/nilfs2/segbuf.c
fs/nilfs2/segment.c
fs/nilfs2/segment.h
fs/nilfs2/sufile.c
fs/nilfs2/sufile.h
fs/nilfs2/super.c
fs/nilfs2/the_nilfs.c
fs/nilfs2/the_nilfs.h
include/linux/binfmts.h
include/linux/ide.h
include/linux/nilfs2_fs.h
include/linux/skbuff.h
include/net/mac80211.h
init/Kconfig
kernel/sched.c
net/ipv4/fib_trie.c
tools/testing/ktest/ktest.pl
tools/testing/ktest/sample.conf

index e560d10..63a027c 100644 (file)
@@ -25,6 +25,10 @@ config SPARC
        select HAVE_DMA_ATTRS
        select HAVE_DMA_API_DEBUG
        select HAVE_ARCH_JUMP_LABEL
+       select HAVE_GENERIC_HARDIRQS
+       select GENERIC_HARDIRQS_NO_DEPRECATED
+       select GENERIC_IRQ_SHOW
+       select USE_GENERIC_SMP_HELPERS if SMP
 
 config SPARC32
        def_bool !64BIT
@@ -43,15 +47,12 @@ config SPARC64
        select HAVE_DYNAMIC_FTRACE
        select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_SYSCALL_TRACEPOINTS
-       select USE_GENERIC_SMP_HELPERS if SMP
        select RTC_DRV_CMOS
        select RTC_DRV_BQ4802
        select RTC_DRV_SUN4V
        select RTC_DRV_STARFIRE
        select HAVE_PERF_EVENTS
        select PERF_USE_VMALLOC
-       select HAVE_GENERIC_HARDIRQS
-       select GENERIC_IRQ_SHOW
        select IRQ_PREFLOW_FASTEOI
 
 config ARCH_DEFCONFIG
index 31d48a0..a4c5a93 100644 (file)
@@ -16,6 +16,10 @@ typedef struct {
        unsigned long clock_tick;
        unsigned int multiplier;
        unsigned int counter;
+#ifdef CONFIG_SMP
+       unsigned int irq_resched_count;
+       unsigned int irq_call_count;
+#endif
        int prom_node;
        int mid;
        int next;
@@ -23,5 +27,6 @@ typedef struct {
 
 DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
 #define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
+#define local_cpu_data() __get_cpu_var(__cpu_data)
 
 #endif /* _SPARC_CPUDATA_H */
index 86666f7..482c79e 100644 (file)
@@ -281,28 +281,27 @@ static inline void sun_fd_enable_dma(void)
        pdma_areasize = pdma_size;
 }
 
-/* Our low-level entry point in arch/sparc/kernel/entry.S */
-extern int sparc_floppy_request_irq(int irq, unsigned long flags,
-                                   irq_handler_t irq_handler);
+extern int sparc_floppy_request_irq(unsigned int irq,
+                                    irq_handler_t irq_handler);
 
 static int sun_fd_request_irq(void)
 {
        static int once = 0;
-       int error;
 
-       if(!once) {
+       if (!once) {
                once = 1;
-               error = sparc_floppy_request_irq(FLOPPY_IRQ,
-                                                IRQF_DISABLED,
-                                                floppy_interrupt);
-               return ((error == 0) ? 0 : -1);
-       } else return 0;
+               return sparc_floppy_request_irq(FLOPPY_IRQ, floppy_interrupt);
+       } else {
+               return 0;
+       }
 }
 
 static struct linux_prom_registers fd_regs[2];
 
 static int sun_floppy_init(void)
 {
+       struct platform_device *op;
+       struct device_node *dp;
        char state[128];
        phandle tnode, fd_node;
        int num_regs;
@@ -310,7 +309,6 @@ static int sun_floppy_init(void)
 
        use_virtual_dma = 1;
 
-       FLOPPY_IRQ = 11;
        /* Forget it if we aren't on a machine that could possibly
         * ever have a floppy drive.
         */
@@ -349,6 +347,26 @@ static int sun_floppy_init(void)
        sun_fdc = (struct sun_flpy_controller *)
            of_ioremap(&r, 0, fd_regs[0].reg_size, "floppy");
 
+       /* Look up irq in platform_device.
+        * We try "SUNW,fdtwo" and "fd"
+        */
+       for_each_node_by_name(dp, "SUNW,fdtwo") {
+               op = of_find_device_by_node(dp);
+               if (op)
+                       break;
+       }
+       if (!op) {
+               for_each_node_by_name(dp, "fd") {
+                       op = of_find_device_by_node(dp);
+                       if (op)
+                               break;
+               }
+       }
+       if (!op)
+               goto no_sun_fdc;
+
+       FLOPPY_IRQ = op->archdata.irqs[0];
+
        /* Last minute sanity check... */
        if(sun_fdc->status_82072 == 0xff) {
                sun_fdc = NULL;
index a34b299..f6902cf 100644 (file)
@@ -5,4 +5,17 @@
 #else
 #include <asm/io_32.h>
 #endif
+
+/*
+ * Defines used for both SPARC32 and SPARC64
+ */
+
+/* Big endian versions of memory read/write routines */
+#define readb_be(__addr)       __raw_readb(__addr)
+#define readw_be(__addr)       __raw_readw(__addr)
+#define readl_be(__addr)       __raw_readl(__addr)
+#define writeb_be(__b, __addr) __raw_writeb(__b, __addr)
+#define writel_be(__w, __addr) __raw_writel(__w, __addr)
+#define writew_be(__l, __addr) __raw_writew(__l, __addr)
+
 #endif
index eced3e3..2ae3aca 100644 (file)
@@ -6,7 +6,11 @@
 #ifndef _SPARC_IRQ_H
 #define _SPARC_IRQ_H
 
-#define NR_IRQS    16
+/* Allocated number of logical irq numbers.
+ * sun4d boxes (ss2000e) should be OK with ~32.
+ * Be on the safe side and make room for 64
+ */
+#define NR_IRQS    64
 
 #include <linux/interrupt.h>
 
index c04f96f..6bdaf1e 100644 (file)
 #define LEON_DIAGF_VALID       0x2000
 #define LEON_DIAGF_VALID_SHIFT 13
 
-/*
- *  Interrupt Sources
- *
- *  The interrupt source numbers directly map to the trap type and to
- *  the bits used in the Interrupt Clear, Interrupt Force, Interrupt Mask,
- *  and the Interrupt Pending Registers.
- */
-#define LEON_INTERRUPT_CORRECTABLE_MEMORY_ERROR        1
-#define LEON_INTERRUPT_UART_1_RX_TX            2
-#define LEON_INTERRUPT_UART_0_RX_TX            3
-#define LEON_INTERRUPT_EXTERNAL_0              4
-#define LEON_INTERRUPT_EXTERNAL_1              5
-#define LEON_INTERRUPT_EXTERNAL_2              6
-#define LEON_INTERRUPT_EXTERNAL_3              7
-#define LEON_INTERRUPT_TIMER1                  8
-#define LEON_INTERRUPT_TIMER2                  9
-#define LEON_INTERRUPT_EMPTY1                  10
-#define LEON_INTERRUPT_EMPTY2                  11
-#define LEON_INTERRUPT_OPEN_ETH                        12
-#define LEON_INTERRUPT_EMPTY4                  13
-#define LEON_INTERRUPT_EMPTY5                  14
-#define LEON_INTERRUPT_EMPTY6                  15
-
 /* irq masks */
 #define LEON_HARD_INT(x)       (1 << (x))      /* irq 0-15 */
 #define LEON_IRQMASK_R         0x0000fffe      /* bit 15- 1 of lregs.irqmask */
@@ -183,7 +160,6 @@ static inline void leon_srmmu_enabletlb(void)
 /* macro access for leon_readnobuffer_reg() */
 #define LEON_BYPASSCACHE_LOAD_VA(x) leon_readnobuffer_reg((unsigned long)(x))
 
-extern void sparc_leon_eirq_register(int eirq);
 extern void leon_init(void);
 extern void leon_switch_mm(void);
 extern void leon_init_IRQ(void);
@@ -239,8 +215,8 @@ static inline int sparc_leon3_cpuid(void)
 #endif /*!__ASSEMBLY__*/
 
 #ifdef CONFIG_SMP
-# define LEON3_IRQ_RESCHEDULE          13
-# define LEON3_IRQ_TICKER              (leon_percpu_timer_dev[0].irq)
+# define LEON3_IRQ_IPI_DEFAULT         13
+# define LEON3_IRQ_TICKER              (leon3_ticker_irq)
 # define LEON3_IRQ_CROSS_CALL          15
 #endif
 
@@ -339,9 +315,9 @@ struct leon2_cacheregs {
 #include <linux/interrupt.h>
 
 struct device_node;
-extern int sparc_leon_eirq_get(int eirq, int cpu);
-extern irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id);
-extern void sparc_leon_eirq_register(int eirq);
+extern unsigned int leon_build_device_irq(unsigned int real_irq,
+                                          irq_flow_handler_t flow_handler,
+                                          const char *name, int do_ack);
 extern void leon_clear_clock_irq(void);
 extern void leon_load_profile_irq(int cpu, unsigned int limit);
 extern void leon_init_timers(irq_handler_t counter_fn);
@@ -358,6 +334,7 @@ extern void leon3_getCacheRegs(struct leon3_cacheregs *regs);
 extern int leon_flush_needed(void);
 extern void leon_switch_mm(void);
 extern int srmmu_swprobe_trace;
+extern int leon3_ticker_irq;
 
 #ifdef CONFIG_SMP
 extern int leon_smp_nrcpus(void);
@@ -366,17 +343,19 @@ extern void leon_smp_done(void);
 extern void leon_boot_cpus(void);
 extern int leon_boot_one_cpu(int i);
 void leon_init_smp(void);
-extern void cpu_probe(void);
 extern void cpu_idle(void);
 extern void init_IRQ(void);
 extern void cpu_panic(void);
 extern int __leon_processor_id(void);
 void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu);
+extern irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused);
 
-extern unsigned int real_irq_entry[], smpleon_ticker[];
+extern unsigned int real_irq_entry[];
+extern unsigned int smpleon_ipi[];
 extern unsigned int patchme_maybe_smp_msg[];
 extern unsigned int t_nmi[], linux_trap_ipi15_leon[];
 extern unsigned int linux_trap_ipi15_sun4m[];
+extern int leon_ipi_irq;
 
 #endif /* CONFIG_SMP */
 
index f20ef56..7eb5d78 100644 (file)
@@ -29,11 +29,17 @@ struct linux_pcic {
        int                     pcic_imdim;
 };
 
-extern int pcic_probe(void);
-/* Erm... MJ redefined pcibios_present() so that it does not work early. */
+#ifdef CONFIG_PCI
 extern int pcic_present(void);
+extern int pcic_probe(void);
+extern void pci_time_init(void);
 extern void sun4m_pci_init_IRQ(void);
-
+#else
+static inline int pcic_present(void) { return 0; }
+static inline int pcic_probe(void) { return 0; }
+static inline void pci_time_init(void) {}
+static inline void sun4m_pci_init_IRQ(void) {}
+#endif
 #endif
 
 /* Size of PCI I/O space which we relocate. */
index 303bd4d..5b31a8e 100644 (file)
@@ -8,6 +8,8 @@
  *  Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
  */
 
+#include <linux/const.h>
+
 #ifndef __ASSEMBLY__
 #include <asm-generic/4level-fixup.h>
 
@@ -456,9 +458,9 @@ extern int io_remap_pfn_range(struct vm_area_struct *vma,
 
 #endif /* !(__ASSEMBLY__) */
 
-#define VMALLOC_START           0xfe600000
+#define VMALLOC_START           _AC(0xfe600000,UL)
 /* XXX Alter this when I get around to fixing sun4c - Anton */
-#define VMALLOC_END             0xffc00000
+#define VMALLOC_END             _AC(0xffc00000,UL)
 
 
 /* We provide our own get_unmapped_area to cope with VA holes for userland */
index f8dddb7..b77128c 100644 (file)
@@ -699,6 +699,9 @@ extern pmd_t swapper_low_pmd_dir[2048];
 extern void paging_init(void);
 extern unsigned long find_ecache_flush_span(unsigned long size);
 
+struct seq_file;
+extern void mmu_info(struct seq_file *);
+
 /* These do nothing with the way I have things setup. */
 #define mmu_lockarea(vaddr, len)               (vaddr)
 #define mmu_unlockarea(vaddr, len)             do { } while(0)
index 2643c62..64718ba 100644 (file)
 # define COMMAND_LINE_SIZE 256
 #endif
 
+#ifdef __KERNEL__
+
+#ifdef CONFIG_SPARC32
+/* The CPU that was used for booting
+ * Only sun4d + leon may have boot_cpu_id != 0
+ */
+extern unsigned char boot_cpu_id;
+extern unsigned char boot_cpu_id4;
+#endif
+
+#endif /* __KERNEL__ */
+
 #endif /* _SPARC_SETUP_H */
index d82d7f4..093f108 100644 (file)
@@ -50,42 +50,38 @@ void smp_callin(void);
 void smp_boot_cpus(void);
 void smp_store_cpu_info(int);
 
+void smp_resched_interrupt(void);
+void smp_call_function_single_interrupt(void);
+void smp_call_function_interrupt(void);
+
 struct seq_file;
 void smp_bogo(struct seq_file *);
 void smp_info(struct seq_file *);
 
 BTFIXUPDEF_CALL(void, smp_cross_call, smpfunc_t, cpumask_t, unsigned long, unsigned long, unsigned long, unsigned long)
 BTFIXUPDEF_CALL(int, __hard_smp_processor_id, void)
+BTFIXUPDEF_CALL(void, smp_ipi_resched, int);
+BTFIXUPDEF_CALL(void, smp_ipi_single, int);
+BTFIXUPDEF_CALL(void, smp_ipi_mask_one, int);
 BTFIXUPDEF_BLACKBOX(hard_smp_processor_id)
 BTFIXUPDEF_BLACKBOX(load_current)
 
 #define smp_cross_call(func,mask,arg1,arg2,arg3,arg4) BTFIXUP_CALL(smp_cross_call)(func,mask,arg1,arg2,arg3,arg4)
 
-static inline void xc0(smpfunc_t func) { smp_cross_call(func, cpu_online_map, 0, 0, 0, 0); }
+static inline void xc0(smpfunc_t func) { smp_cross_call(func, *cpu_online_mask, 0, 0, 0, 0); }
 static inline void xc1(smpfunc_t func, unsigned long arg1)
-{ smp_cross_call(func, cpu_online_map, arg1, 0, 0, 0); }
+{ smp_cross_call(func, *cpu_online_mask, arg1, 0, 0, 0); }
 static inline void xc2(smpfunc_t func, unsigned long arg1, unsigned long arg2)
-{ smp_cross_call(func, cpu_online_map, arg1, arg2, 0, 0); }
+{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0); }
 static inline void xc3(smpfunc_t func, unsigned long arg1, unsigned long arg2,
                           unsigned long arg3)
-{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, 0); }
+{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, 0); }
 static inline void xc4(smpfunc_t func, unsigned long arg1, unsigned long arg2,
                           unsigned long arg3, unsigned long arg4)
-{ smp_cross_call(func, cpu_online_map, arg1, arg2, arg3, arg4); }
-
-static inline int smp_call_function(void (*func)(void *info), void *info, int wait)
-{
-       xc1((smpfunc_t)func, (unsigned long)info);
-       return 0;
-}
+{ smp_cross_call(func, *cpu_online_mask, arg1, arg2, arg3, arg4); }
 
-static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
-                                          void *info, int wait)
-{
-       smp_cross_call((smpfunc_t)func, cpumask_of_cpu(cpuid),
-                      (unsigned long) info, 0, 0, 0);
-       return 0;
-}
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
 
 static inline int cpu_logical_map(int cpu)
 {
@@ -135,6 +131,11 @@ static inline int hard_smp_processor_id(void)
                __asm__ __volatile__("lda [%g0] ASI_M_VIKING_TMP1, %0\n\t"
                                     "nop; nop" :
                                     "=&r" (cpuid));
+                    - leon
+               __asm__ __volatile__(   "rd %asr17, %0\n\t"
+                                       "srl %0, 0x1c, %0\n\t"
+                                       "nop\n\t" :
+                                       "=&r" (cpuid));
           See btfixup.h and btfixupprep.c to understand how a blackbox works.
         */
        __asm__ __volatile__("sethi %%hi(___b_hard_smp_processor_id), %0\n\t"
index f49e11c..20bca89 100644 (file)
@@ -49,6 +49,10 @@ extern void cpu_play_dead(void);
 
 extern void smp_fetch_global_regs(void);
 
+struct seq_file;
+void smp_bogo(struct seq_file *);
+void smp_info(struct seq_file *);
+
 #ifdef CONFIG_HOTPLUG_CPU
 extern int __cpu_disable(void);
 extern void __cpu_die(unsigned int cpu);
index 7f9b9db..5f5b8bf 100644 (file)
@@ -9,6 +9,7 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/psr.h>
+#include <asm/processor.h> /* for cpu_relax */
 
 #define arch_spin_is_locked(lock) (*((volatile unsigned char *)(lock)) != 0)
 
index 890036b..47a7e86 100644 (file)
 
 #include <linux/irqflags.h>
 
-static inline unsigned int probe_irq_mask(unsigned long val)
-{
-       return 0;
-}
-
 /*
  * Sparc (general) CPU types
  */
index e3b65d8..3c96d3b 100644 (file)
@@ -29,10 +29,6 @@ enum sparc_cpu {
 /* This cannot ever be a sun4c :) That's just history. */
 #define ARCH_SUN4C 0
 
-extern const char *sparc_cpu_type;
-extern const char *sparc_fpu_type;
-extern const char *sparc_pmu_type;
-
 extern char reboot_command[];
 
 /* These are here in an effort to more fully work around Spitfire Errata
index 5b0a06d..a9be04b 100644 (file)
         st       %scratch, [%cur_reg + TI_W_SAVED];
 
 #ifdef CONFIG_SMP
+/* Results of LOAD_CURRENT() after BTFIXUP for SUN4M, SUN4D & LEON (comments) */
 #define LOAD_CURRENT4M(dest_reg, idreg) \
         rd       %tbr, %idreg; \
        sethi    %hi(current_set), %dest_reg; \
        or      %dest_reg, %lo(C_LABEL(current_set)), %dest_reg; \
        ld      [%idreg + %dest_reg], %dest_reg;
 
+#define LOAD_CURRENT_LEON(dest_reg, idreg)                     \
+       rd      %asr17, %idreg;                                 \
+       sethi   %hi(current_set), %dest_reg;                    \
+       srl     %idreg, 0x1c, %idreg;                           \
+       or      %dest_reg, %lo(current_set), %dest_reg;         \
+       sll     %idreg, 0x2, %idreg;                            \
+       ld      [%idreg + %dest_reg], %dest_reg;
+
 /* Blackbox - take care with this... - check smp4m and smp4d before changing this. */
 #define LOAD_CURRENT(dest_reg, idreg)                                  \
        sethi    %hi(___b_load_current), %idreg;                        \
index 99aa4db..9cff270 100644 (file)
@@ -71,10 +71,6 @@ obj-$(CONFIG_SPARC64)        += pcr.o
 obj-$(CONFIG_SPARC64)  += nmi.o
 obj-$(CONFIG_SPARC64_SMP) += cpumap.o
 
-# sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation
-obj-$(CONFIG_SPARC32)     += devres.o
-devres-y                  := ../../../kernel/irq/devres.o
-
 obj-y                     += dma.o
 
 obj-$(CONFIG_SPARC32_PCI) += pcic.o
index 7925c54..138dbbc 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
  */
 
+#include <linux/seq_file.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -11,7 +12,9 @@
 #include <linux/threads.h>
 
 #include <asm/spitfire.h>
+#include <asm/pgtable.h>
 #include <asm/oplib.h>
+#include <asm/setup.h>
 #include <asm/page.h>
 #include <asm/head.h>
 #include <asm/psr.h>
@@ -23,6 +26,9 @@
 DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
 EXPORT_PER_CPU_SYMBOL(__cpu_data);
 
+int ncpus_probed;
+unsigned int fsr_storage;
+
 struct cpu_info {
        int psr_vers;
        const char *name;
@@ -247,13 +253,12 @@ static const struct manufacturer_info __initconst manufacturer_info[] = {
  * machine type value into consideration too.  I will fix this.
  */
 
-const char *sparc_cpu_type;
-const char *sparc_fpu_type;
+static const char *sparc_cpu_type;
+static const char *sparc_fpu_type;
 const char *sparc_pmu_type;
 
-unsigned int fsr_storage;
 
-static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
+static void __init set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
 {
        const struct manufacturer_info *manuf;
        int i;
@@ -313,7 +318,123 @@ static void set_cpu_and_fpu(int psr_impl, int psr_vers, int fpu_vers)
 }
 
 #ifdef CONFIG_SPARC32
-void __cpuinit cpu_probe(void)
+static int show_cpuinfo(struct seq_file *m, void *__unused)
+{
+       seq_printf(m,
+                  "cpu\t\t: %s\n"
+                  "fpu\t\t: %s\n"
+                  "promlib\t\t: Version %d Revision %d\n"
+                  "prom\t\t: %d.%d\n"
+                  "type\t\t: %s\n"
+                  "ncpus probed\t: %d\n"
+                  "ncpus active\t: %d\n"
+#ifndef CONFIG_SMP
+                  "CPU0Bogo\t: %lu.%02lu\n"
+                  "CPU0ClkTck\t: %ld\n"
+#endif
+                  ,
+                  sparc_cpu_type,
+                  sparc_fpu_type ,
+                  romvec->pv_romvers,
+                  prom_rev,
+                  romvec->pv_printrev >> 16,
+                  romvec->pv_printrev & 0xffff,
+                  &cputypval[0],
+                  ncpus_probed,
+                  num_online_cpus()
+#ifndef CONFIG_SMP
+                  , cpu_data(0).udelay_val/(500000/HZ),
+                  (cpu_data(0).udelay_val/(5000/HZ)) % 100,
+                  cpu_data(0).clock_tick
+#endif
+               );
+
+#ifdef CONFIG_SMP
+       smp_bogo(m);
+#endif
+       mmu_info(m);
+#ifdef CONFIG_SMP
+       smp_info(m);
+#endif
+       return 0;
+}
+#endif /* CONFIG_SPARC32 */
+
+#ifdef CONFIG_SPARC64
+unsigned int dcache_parity_tl1_occurred;
+unsigned int icache_parity_tl1_occurred;
+
+
+static int show_cpuinfo(struct seq_file *m, void *__unused)
+{
+       seq_printf(m,
+                  "cpu\t\t: %s\n"
+                  "fpu\t\t: %s\n"
+                  "pmu\t\t: %s\n"
+                  "prom\t\t: %s\n"
+                  "type\t\t: %s\n"
+                  "ncpus probed\t: %d\n"
+                  "ncpus active\t: %d\n"
+                  "D$ parity tl1\t: %u\n"
+                  "I$ parity tl1\t: %u\n"
+#ifndef CONFIG_SMP
+                  "Cpu0ClkTck\t: %016lx\n"
+#endif
+                  ,
+                  sparc_cpu_type,
+                  sparc_fpu_type,
+                  sparc_pmu_type,
+                  prom_version,
+                  ((tlb_type == hypervisor) ?
+                   "sun4v" :
+                   "sun4u"),
+                  ncpus_probed,
+                  num_online_cpus(),
+                  dcache_parity_tl1_occurred,
+                  icache_parity_tl1_occurred
+#ifndef CONFIG_SMP
+                  , cpu_data(0).clock_tick
+#endif
+               );
+#ifdef CONFIG_SMP
+       smp_bogo(m);
+#endif
+       mmu_info(m);
+#ifdef CONFIG_SMP
+       smp_info(m);
+#endif
+       return 0;
+}
+#endif /* CONFIG_SPARC64 */
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+       /* The pointer we are returning is arbitrary,
+        * it just has to be non-NULL and not IS_ERR
+        * in the success case.
+        */
+       return *pos == 0 ? &c_start : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       ++*pos;
+       return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+const struct seq_operations cpuinfo_op = {
+       .start =c_start,
+       .next = c_next,
+       .stop = c_stop,
+       .show = show_cpuinfo,
+};
+
+#ifdef CONFIG_SPARC32
+static int __init cpu_type_probe(void)
 {
        int psr_impl, psr_vers, fpu_vers;
        int psr;
@@ -332,8 +453,12 @@ void __cpuinit cpu_probe(void)
        put_psr(psr);
 
        set_cpu_and_fpu(psr_impl, psr_vers, fpu_vers);
+
+       return 0;
 }
-#else
+#endif /* CONFIG_SPARC32 */
+
+#ifdef CONFIG_SPARC64
 static void __init sun4v_cpu_probe(void)
 {
        switch (sun4v_chip_type) {
@@ -374,6 +499,6 @@ static int __init cpu_type_probe(void)
        }
        return 0;
 }
+#endif /* CONFIG_SPARC64 */
 
 early_initcall(cpu_type_probe);
-#endif
index 8de64c8..d91fd78 100644 (file)
@@ -202,7 +202,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void)
        new_tree->total_nodes = n;
        memcpy(&new_tree->level, tmp_level, sizeof(tmp_level));
 
-       prev_cpu = cpu = first_cpu(cpu_online_map);
+       prev_cpu = cpu = cpumask_first(cpu_online_mask);
 
        /* Initialize all levels in the tree with the first CPU */
        for (level = CPUINFO_LVL_PROC; level >= CPUINFO_LVL_ROOT; level--) {
@@ -381,7 +381,7 @@ static int simple_map_to_cpu(unsigned int index)
        }
 
        /* Impossible, since num_online_cpus() <= num_possible_cpus() */
-       return first_cpu(cpu_online_map);
+       return cpumask_first(cpu_online_mask);
 }
 
 static int _map_to_cpu(unsigned int index)
index d2eddd6..113c052 100644 (file)
@@ -20,7 +20,6 @@
 #include <asm/system.h>
 #include <asm/cpudata.h>
 
-extern void cpu_probe(void);
 extern void clock_stop_probe(void); /* tadpole.c */
 extern void sun4c_probe_memerr_reg(void);
 
@@ -115,7 +114,7 @@ int cpu_get_hwmid(phandle prom_node)
 
 void __init device_scan(void)
 {
-       prom_printf("Booting Linux...\n");
+       printk(KERN_NOTICE "Booting Linux...\n");
 
 #ifndef CONFIG_SMP
        {
@@ -133,7 +132,6 @@ void __init device_scan(void)
        }
 #endif /* !CONFIG_SMP */
 
-       cpu_probe();
        {
                extern void auxio_probe(void);
                extern void auxio_power_probe(void);
index 3add4de..dd1342c 100644 (file)
@@ -497,7 +497,7 @@ static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
        tag->num_records = ncpus;
 
        i = 0;
-       for_each_cpu_mask(cpu, *mask) {
+       for_each_cpu(cpu, mask) {
                ent[i].cpu = cpu;
                ent[i].result = DR_CPU_RES_OK;
                ent[i].stat = default_stat;
@@ -534,7 +534,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp,
        int resp_len, ncpus, cpu;
        unsigned long flags;
 
-       ncpus = cpus_weight(*mask);
+       ncpus = cpumask_weight(mask);
        resp_len = dr_cpu_size_response(ncpus);
        resp = kzalloc(resp_len, GFP_KERNEL);
        if (!resp)
@@ -547,7 +547,7 @@ static int __cpuinit dr_cpu_configure(struct ds_info *dp,
        mdesc_populate_present_mask(mask);
        mdesc_fill_in_cpu_data(mask);
 
-       for_each_cpu_mask(cpu, *mask) {
+       for_each_cpu(cpu, mask) {
                int err;
 
                printk(KERN_INFO "ds-%llu: Starting cpu %d...\n",
@@ -593,7 +593,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp,
        int resp_len, ncpus, cpu;
        unsigned long flags;
 
-       ncpus = cpus_weight(*mask);
+       ncpus = cpumask_weight(mask);
        resp_len = dr_cpu_size_response(ncpus);
        resp = kzalloc(resp_len, GFP_KERNEL);
        if (!resp)
@@ -603,7 +603,7 @@ static int dr_cpu_unconfigure(struct ds_info *dp,
                             resp_len, ncpus, mask,
                             DR_CPU_STAT_UNCONFIGURED);
 
-       for_each_cpu_mask(cpu, *mask) {
+       for_each_cpu(cpu, mask) {
                int err;
 
                printk(KERN_INFO "ds-%llu: Shutting down cpu %d...\n",
@@ -649,13 +649,13 @@ static void __cpuinit dr_cpu_data(struct ds_info *dp,
 
        purge_dups(cpu_list, tag->num_records);
 
-       cpus_clear(mask);
+       cpumask_clear(&mask);
        for (i = 0; i < tag->num_records; i++) {
                if (cpu_list[i] == CPU_SENTINEL)
                        continue;
 
                if (cpu_list[i] < nr_cpu_ids)
-                       cpu_set(cpu_list[i], mask);
+                       cpumask_set_cpu(cpu_list[i], &mask);
        }
 
        if (tag->type == DR_CPU_CONFIGURE)
index 6da784a..8341963 100644 (file)
@@ -269,19 +269,22 @@ smp4m_ticker:
        /* Here is where we check for possible SMP IPI passed to us
         * on some level other than 15 which is the NMI and only used
         * for cross calls.  That has a separate entry point below.
+        *
+        * IPIs are sent on Level 12, 13 and 14. See IRQ_IPI_*.
         */
 maybe_smp4m_msg:
        GET_PROCESSOR4M_ID(o3)
        sethi   %hi(sun4m_irq_percpu), %l5
        sll     %o3, 2, %o3
        or      %l5, %lo(sun4m_irq_percpu), %o5
-       sethi   %hi(0x40000000), %o2
+       sethi   %hi(0x70000000), %o2    ! Check all soft-IRQs
        ld      [%o5 + %o3], %o1
        ld      [%o1 + 0x00], %o3       ! sun4m_irq_percpu[cpu]->pending
        andcc   %o3, %o2, %g0
        be,a    smp4m_ticker
         cmp    %l7, 14
-       st      %o2, [%o1 + 0x04]       ! sun4m_irq_percpu[cpu]->clear=0x40000000
+       /* Soft-IRQ IPI */
+       st      %o2, [%o1 + 0x04]       ! sun4m_irq_percpu[cpu]->clear=0x70000000
        WRITE_PAUSE
        ld      [%o1 + 0x00], %g0       ! sun4m_irq_percpu[cpu]->pending
        WRITE_PAUSE
@@ -290,9 +293,27 @@ maybe_smp4m_msg:
        WRITE_PAUSE
        wr      %l4, PSR_ET, %psr
        WRITE_PAUSE
-       call    smp_reschedule_irq
+       sll     %o2, 28, %o2            ! shift for simpler checks below
+maybe_smp4m_msg_check_single:
+       andcc   %o2, 0x1, %g0
+       beq,a   maybe_smp4m_msg_check_mask
+        andcc  %o2, 0x2, %g0
+       call    smp_call_function_single_interrupt
         nop
-
+       andcc   %o2, 0x2, %g0
+maybe_smp4m_msg_check_mask:
+       beq,a   maybe_smp4m_msg_check_resched
+        andcc  %o2, 0x4, %g0
+       call    smp_call_function_interrupt
+        nop
+       andcc   %o2, 0x4, %g0
+maybe_smp4m_msg_check_resched:
+       /* rescheduling is done in RESTORE_ALL regardless, but incr stats */
+       beq,a   maybe_smp4m_msg_out
+        nop
+       call    smp_resched_interrupt
+        nop
+maybe_smp4m_msg_out:
        RESTORE_ALL
 
        .align  4
@@ -401,18 +422,18 @@ linux_trap_ipi15_sun4d:
 1:     b,a     1b
 
 #ifdef CONFIG_SPARC_LEON
-
-       .globl  smpleon_ticker
-       /* SMP per-cpu ticker interrupts are handled specially. */
-smpleon_ticker:
+       .globl  smpleon_ipi
+       .extern leon_ipi_interrupt
+       /* SMP per-cpu IPI interrupts are handled specially. */
+smpleon_ipi:
         SAVE_ALL
        or      %l0, PSR_PIL, %g2
        wr      %g2, 0x0, %psr
        WRITE_PAUSE
        wr      %g2, PSR_ET, %psr
        WRITE_PAUSE
-       call    leon_percpu_timer_interrupt
-        add    %sp, STACKFRAME_SZ, %o0
+       call    leonsmp_ipi_interrupt
+        add    %sp, STACKFRAME_SZ, %o1 ! pt_regs
        wr      %l0, PSR_ET, %psr
        WRITE_PAUSE
        RESTORE_ALL
index 5942349..5877857 100644 (file)
@@ -810,31 +810,25 @@ found_version:
 got_prop:
 #ifdef CONFIG_SPARC_LEON
                /* no cpu-type check is needed, it is a SPARC-LEON */
-#ifdef CONFIG_SMP
-               ba leon_smp_init
-                nop
 
-               .global leon_smp_init
-leon_smp_init:
-               sethi   %hi(boot_cpu_id), %g1    ! master always 0
-               stb     %g0, [%g1 + %lo(boot_cpu_id)]
-               sethi   %hi(boot_cpu_id4), %g1   ! master always 0
-               stb     %g0, [%g1 + %lo(boot_cpu_id4)]
+               sethi   %hi(boot_cpu_id), %g2   ! boot-cpu index
 
-               rd     %asr17,%g1
-               srl    %g1,28,%g1
+#ifdef CONFIG_SMP
+               ldub    [%g2 + %lo(boot_cpu_id)], %g1
+               cmp     %g1, 0xff               ! unset means first CPU
+               bne     leon_smp_cpu_startup    ! continue only with master
+                nop
+#endif
+               /* Get CPU-ID from most significant 4-bit of ASR17 */
+               rd     %asr17, %g1
+               srl    %g1, 28, %g1
 
-               cmp %g0,%g1
-                beq sun4c_continue_boot         !continue with master
-               nop
+               /* Update boot_cpu_id only on boot cpu */
+               stub    %g1, [%g2 + %lo(boot_cpu_id)]
 
-               ba leon_smp_cpu_startup
-                nop
-#else
                ba sun4c_continue_boot
                 nop
 #endif
-#endif
                set     cputypval, %o2
                ldub    [%o2 + 0x4], %l1
 
@@ -893,9 +887,6 @@ sun4d_init:
        sta     %g4, [%g0] ASI_M_VIKING_TMP1
        sethi   %hi(boot_cpu_id), %g5
        stb     %g4, [%g5 + %lo(boot_cpu_id)]
-       sll     %g4, 2, %g4
-       sethi   %hi(boot_cpu_id4), %g5
-       stb     %g4, [%g5 + %lo(boot_cpu_id4)]
 #endif
 
        /* Fall through to sun4m_init */
@@ -1024,14 +1015,28 @@ sun4c_continue_boot:
                bl      1b
                 add    %o0, 0x1, %o0
 
+               /* If boot_cpu_id has not been setup by machine specific
+                * init-code above we default it to zero.
+                */
+               sethi   %hi(boot_cpu_id), %g2
+               ldub    [%g2 + %lo(boot_cpu_id)], %g3
+               cmp     %g3, 0xff
+               bne     1f
+                nop
+               mov     %g0, %g3
+               stub    %g3, [%g2 + %lo(boot_cpu_id)]
+
+1:             /* boot_cpu_id set. calculate boot_cpu_id4 = boot_cpu_id*4 */
+               sll     %g3, 2, %g3
+               sethi   %hi(boot_cpu_id4), %g2
+               stub    %g3, [%g2 + %lo(boot_cpu_id4)]
+
                /* Initialize the uwinmask value for init task just in case.
                 * But first make current_set[boot_cpu_id] point to something useful.
                 */
                set     init_thread_union, %g6
                set     current_set, %g2
 #ifdef CONFIG_SMP
-               sethi   %hi(boot_cpu_id4), %g3
-               ldub    [%g3 + %lo(boot_cpu_id4)], %g3
                st      %g6, [%g2]
                add     %g2, %g3, %g2
 #endif
index c6ce9a6..1c9c80a 100644 (file)
 #include <asm/io-unit.h>
 #include <asm/leon.h>
 
+/* This function must make sure that caches and memory are coherent after DMA
+ * On LEON systems without cache snooping it flushes the entire D-CACHE.
+ */
 #ifndef CONFIG_SPARC_LEON
-#define mmu_inval_dma_area(p, l)       /* Anton pulled it out for 2.4.0-xx */
+static inline void dma_make_coherent(unsigned long pa, unsigned long len)
+{
+}
 #else
-static inline void mmu_inval_dma_area(void *va, unsigned long len)
+static inline void dma_make_coherent(unsigned long pa, unsigned long len)
 {
        if (!sparc_leon3_snooping_enabled())
                leon_flush_dcache_all();
@@ -284,7 +289,6 @@ static void *sbus_alloc_coherent(struct device *dev, size_t len,
                printk("sbus_alloc_consistent: cannot occupy 0x%lx", len_total);
                goto err_nova;
        }
-       mmu_inval_dma_area((void *)va, len_total);
 
        // XXX The mmu_map_dma_area does this for us below, see comments.
        // sparc_mapiorange(0, virt_to_phys(va), res->start, len_total);
@@ -336,7 +340,6 @@ static void sbus_free_coherent(struct device *dev, size_t n, void *p,
        release_resource(res);
        kfree(res);
 
-       /* mmu_inval_dma_area(va, n); */ /* it's consistent, isn't it */
        pgv = virt_to_page(p);
        mmu_unmap_dma_area(dev, ba, n);
 
@@ -463,7 +466,6 @@ static void *pci32_alloc_coherent(struct device *dev, size_t len,
                printk("pci_alloc_consistent: cannot occupy 0x%lx", len_total);
                goto err_nova;
        }
-       mmu_inval_dma_area(va, len_total);
        sparc_mapiorange(0, virt_to_phys(va), res->start, len_total);
 
        *pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */
@@ -489,7 +491,6 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p,
                                dma_addr_t ba)
 {
        struct resource *res;
-       void *pgp;
 
        if ((res = _sparc_find_resource(&_sparc_dvma,
            (unsigned long)p)) == NULL) {
@@ -509,14 +510,12 @@ static void pci32_free_coherent(struct device *dev, size_t n, void *p,
                return;
        }
 
-       pgp = phys_to_virt(ba); /* bus_to_virt actually */
-       mmu_inval_dma_area(pgp, n);
+       dma_make_coherent(ba, n);
        sparc_unmapiorange((unsigned long)p, n);
 
        release_resource(res);
        kfree(res);
-
-       free_pages((unsigned long)pgp, get_order(n));
+       free_pages((unsigned long)phys_to_virt(ba), get_order(n));
 }
 
 /*
@@ -535,7 +534,7 @@ static void pci32_unmap_page(struct device *dev, dma_addr_t ba, size_t size,
                             enum dma_data_direction dir, struct dma_attrs *attrs)
 {
        if (dir != PCI_DMA_TODEVICE)
-               mmu_inval_dma_area(phys_to_virt(ba), PAGE_ALIGN(size));
+               dma_make_coherent(ba, PAGE_ALIGN(size));
 }
 
 /* Map a set of buffers described by scatterlist in streaming
@@ -562,8 +561,7 @@ static int pci32_map_sg(struct device *device, struct scatterlist *sgl,
 
        /* IIep is write-through, not flushing. */
        for_each_sg(sgl, sg, nents, n) {
-               BUG_ON(page_address(sg_page(sg)) == NULL);
-               sg->dma_address = virt_to_phys(sg_virt(sg));
+               sg->dma_address = sg_phys(sg);
                sg->dma_length = sg->length;
        }
        return nents;
@@ -582,9 +580,7 @@ static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl,
 
        if (dir != PCI_DMA_TODEVICE) {
                for_each_sg(sgl, sg, nents, n) {
-                       BUG_ON(page_address(sg_page(sg)) == NULL);
-                       mmu_inval_dma_area(page_address(sg_page(sg)),
-                                          PAGE_ALIGN(sg->length));
+                       dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
                }
        }
 }
@@ -603,8 +599,7 @@ static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba,
                                      size_t size, enum dma_data_direction dir)
 {
        if (dir != PCI_DMA_TODEVICE) {
-               mmu_inval_dma_area(phys_to_virt(ba),
-                                  PAGE_ALIGN(size));
+               dma_make_coherent(ba, PAGE_ALIGN(size));
        }
 }
 
@@ -612,8 +607,7 @@ static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba,
                                         size_t size, enum dma_data_direction dir)
 {
        if (dir != PCI_DMA_TODEVICE) {
-               mmu_inval_dma_area(phys_to_virt(ba),
-                                  PAGE_ALIGN(size));
+               dma_make_coherent(ba, PAGE_ALIGN(size));
        }
 }
 
@@ -631,9 +625,7 @@ static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
 
        if (dir != PCI_DMA_TODEVICE) {
                for_each_sg(sgl, sg, nents, n) {
-                       BUG_ON(page_address(sg_page(sg)) == NULL);
-                       mmu_inval_dma_area(page_address(sg_page(sg)),
-                                          PAGE_ALIGN(sg->length));
+                       dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
                }
        }
 }
@@ -646,9 +638,7 @@ static void pci32_sync_sg_for_device(struct device *device, struct scatterlist *
 
        if (dir != PCI_DMA_TODEVICE) {
                for_each_sg(sgl, sg, nents, n) {
-                       BUG_ON(page_address(sg_page(sg)) == NULL);
-                       mmu_inval_dma_area(page_address(sg_page(sg)),
-                                          PAGE_ALIGN(sg->length));
+                       dma_make_coherent(sg_phys(sg), PAGE_ALIGN(sg->length));
                }
        }
 }
index 008453b..100b9c2 100644 (file)
@@ -2,6 +2,23 @@
 
 #include <asm/btfixup.h>
 
+struct irq_bucket {
+        struct irq_bucket *next;
+        unsigned int real_irq;
+        unsigned int irq;
+        unsigned int pil;
+};
+
+#define SUN4D_MAX_BOARD 10
+#define SUN4D_MAX_IRQ ((SUN4D_MAX_BOARD + 2) << 5)
+
+/* Map between the irq identifier used in hw to the
+ * irq_bucket. The map is sufficient large to hold
+ * the sun4d hw identifiers.
+ */
+extern struct irq_bucket *irq_map[SUN4D_MAX_IRQ];
+
+
 /* sun4m specific type definitions */
 
 /* This maps direct to CPU specific interrupt registers */
@@ -35,6 +52,10 @@ struct sparc_irq_config {
 };
 extern struct sparc_irq_config sparc_irq_config;
 
+unsigned int irq_alloc(unsigned int real_irq, unsigned int pil);
+void irq_link(unsigned int irq);
+void irq_unlink(unsigned int irq);
+void handler_irq(unsigned int pil, struct pt_regs *regs);
 
 /* Dave Redman (djhr@tadpole.co.uk)
  * changed these to function pointers.. it saves cycles and will allow
@@ -44,33 +65,9 @@ extern struct sparc_irq_config sparc_irq_config;
  * Changed these to btfixup entities... It saves cycles :)
  */
 
-BTFIXUPDEF_CALL(void, disable_irq, unsigned int)
-BTFIXUPDEF_CALL(void, enable_irq, unsigned int)
-BTFIXUPDEF_CALL(void, disable_pil_irq, unsigned int)
-BTFIXUPDEF_CALL(void, enable_pil_irq, unsigned int)
 BTFIXUPDEF_CALL(void, clear_clock_irq, void)
 BTFIXUPDEF_CALL(void, load_profile_irq, int, unsigned int)
 
-static inline void __disable_irq(unsigned int irq)
-{
-       BTFIXUP_CALL(disable_irq)(irq);
-}
-
-static inline void __enable_irq(unsigned int irq)
-{
-       BTFIXUP_CALL(enable_irq)(irq);
-}
-
-static inline void disable_pil_irq(unsigned int irq)
-{
-       BTFIXUP_CALL(disable_pil_irq)(irq);
-}
-
-static inline void enable_pil_irq(unsigned int irq)
-{
-       BTFIXUP_CALL(enable_pil_irq)(irq);
-}
-
 static inline void clear_clock_irq(void)
 {
        BTFIXUP_CALL(clear_clock_irq)();
@@ -89,4 +86,10 @@ BTFIXUPDEF_CALL(void, set_irq_udt, int)
 #define set_cpu_int(cpu,level) BTFIXUP_CALL(set_cpu_int)(cpu,level)
 #define clear_cpu_int(cpu,level) BTFIXUP_CALL(clear_cpu_int)(cpu,level)
 #define set_irq_udt(cpu) BTFIXUP_CALL(set_irq_udt)(cpu)
+
+/* All SUN4D IPIs are sent on this IRQ, may be shared with hard IRQs */
+#define SUN4D_IPI_IRQ 14
+
+extern void sun4d_ipi_interrupt(void);
+
 #endif
index 7c93df4..9b89d84 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/seq_file.h>
 
 #include <asm/cacheflush.h>
+#include <asm/cpudata.h>
 #include <asm/pcic.h>
 #include <asm/leon.h>
 
@@ -101,284 +102,173 @@ EXPORT_SYMBOL(arch_local_irq_restore);
  * directed CPU interrupts using the existing enable/disable irq code
  * with tweaks.
  *
+ * Sun4d complicates things even further.  IRQ numbers are arbitrary
+ * 32-bit values in that case.  Since this is similar to sparc64,
+ * we adopt a virtual IRQ numbering scheme as is done there.
+ * Virutal interrupt numbers are allocated by build_irq().  So NR_IRQS
+ * just becomes a limit of how many interrupt sources we can handle in
+ * a single system.  Even fully loaded SS2000 machines top off at
+ * about 32 interrupt sources or so, therefore a NR_IRQS value of 64
+ * is more than enough.
+  *
+ * We keep a map of per-PIL enable interrupts.  These get wired
+ * up via the irq_chip->startup() method which gets invoked by
+ * the generic IRQ layer during request_irq().
  */
 
 
+/* Table of allocated irqs. Unused entries has irq == 0 */
+static struct irq_bucket irq_table[NR_IRQS];
+/* Protect access to irq_table */
+static DEFINE_SPINLOCK(irq_table_lock);
 
-/*
- * Dave Redman (djhr@tadpole.co.uk)
- *
- * There used to be extern calls and hard coded values here.. very sucky!
- * instead, because some of the devices attach very early, I do something
- * equally sucky but at least we'll never try to free statically allocated
- * space or call kmalloc before kmalloc_init :(.
- *
- * In fact it's the timer10 that attaches first.. then timer14
- * then kmalloc_init is called.. then the tty interrupts attach.
- * hmmm....
- *
- */
-#define MAX_STATIC_ALLOC       4
-struct irqaction static_irqaction[MAX_STATIC_ALLOC];
-int static_irq_count;
-
-static struct {
-       struct irqaction *action;
-       int flags;
-} sparc_irq[NR_IRQS];
-#define SPARC_IRQ_INPROGRESS 1
-
-/* Used to protect the IRQ action lists */
-DEFINE_SPINLOCK(irq_action_lock);
+/* Map between the irq identifier used in hw to the irq_bucket. */
+struct irq_bucket *irq_map[SUN4D_MAX_IRQ];
+/* Protect access to irq_map */
+static DEFINE_SPINLOCK(irq_map_lock);
 
-int show_interrupts(struct seq_file *p, void *v)
+/* Allocate a new irq from the irq_table */
+unsigned int irq_alloc(unsigned int real_irq, unsigned int pil)
 {
-       int i = *(loff_t *)v;
-       struct irqaction *action;
        unsigned long flags;
-#ifdef CONFIG_SMP
-       int j;
-#endif
+       unsigned int i;
+
+       spin_lock_irqsave(&irq_table_lock, flags);
+       for (i = 1; i < NR_IRQS; i++) {
+               if (irq_table[i].real_irq == real_irq && irq_table[i].pil == pil)
+                       goto found;
+       }
 
-       if (sparc_cpu_model == sun4d)
-               return show_sun4d_interrupts(p, v);
+       for (i = 1; i < NR_IRQS; i++) {
+               if (!irq_table[i].irq)
+                       break;
+       }
 
-       spin_lock_irqsave(&irq_action_lock, flags);
        if (i < NR_IRQS) {
-               action = sparc_irq[i].action;
-               if (!action)
-                       goto out_unlock;
-               seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
-               seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-               for_each_online_cpu(j) {
-                       seq_printf(p, "%10u ",
-                                   kstat_cpu(j).irqs[i]);
-               }
-#endif
-               seq_printf(p, " %c %s",
-                       (action->flags & IRQF_DISABLED) ? '+' : ' ',
-                       action->name);
-               for (action = action->next; action; action = action->next) {
-                       seq_printf(p, ",%s %s",
-                               (action->flags & IRQF_DISABLED) ? " +" : "",
-                               action->name);
-               }
-               seq_putc(p, '\n');
+               irq_table[i].real_irq = real_irq;
+               irq_table[i].irq = i;
+               irq_table[i].pil = pil;
+       } else {
+               printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
+               i = 0;
        }
-out_unlock:
-       spin_unlock_irqrestore(&irq_action_lock, flags);
-       return 0;
+found:
+       spin_unlock_irqrestore(&irq_table_lock, flags);
+
+       return i;
 }
 
-void free_irq(unsigned int irq, void *dev_id)
+/* Based on a single pil handler_irq may need to call several
+ * interrupt handlers. Use irq_map as entry to irq_table,
+ * and let each entry in irq_table point to the next entry.
+ */
+void irq_link(unsigned int irq)
 {
-       struct irqaction *action;
-       struct irqaction **actionp;
+       struct irq_bucket *p;
        unsigned long flags;
-       unsigned int cpu_irq;
-
-       if (sparc_cpu_model == sun4d) {
-               sun4d_free_irq(irq, dev_id);
-               return;
-       }
-       cpu_irq = irq & (NR_IRQS - 1);
-       if (cpu_irq > 14) {  /* 14 irq levels on the sparc */
-               printk(KERN_ERR "Trying to free bogus IRQ %d\n", irq);
-               return;
-       }
+       unsigned int pil;
 
-       spin_lock_irqsave(&irq_action_lock, flags);
+       BUG_ON(irq >= NR_IRQS);
 
-       actionp = &sparc_irq[cpu_irq].action;
-       action = *actionp;
+       spin_lock_irqsave(&irq_map_lock, flags);
 
-       if (!action->handler) {
-               printk(KERN_ERR "Trying to free free IRQ%d\n", irq);
-               goto out_unlock;
-       }
-       if (dev_id) {
-               for (; action; action = action->next) {
-                       if (action->dev_id == dev_id)
-                               break;
-                       actionp = &action->next;
-               }
-               if (!action) {
-                       printk(KERN_ERR "Trying to free free shared IRQ%d\n",
-                              irq);
-                       goto out_unlock;
-               }
-       } else if (action->flags & IRQF_SHARED) {
-               printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n",
-                      irq);
-               goto out_unlock;
-       }
-       if (action->flags & SA_STATIC_ALLOC) {
-               /*
-                * This interrupt is marked as specially allocated
-                * so it is a bad idea to free it.
-                */
-               printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n",
-                      irq, action->name);
-               goto out_unlock;
-       }
-
-       *actionp = action->next;
+       p = &irq_table[irq];
+       pil = p->pil;
+       BUG_ON(pil > SUN4D_MAX_IRQ);
+       p->next = irq_map[pil];
+       irq_map[pil] = p;
 
-       spin_unlock_irqrestore(&irq_action_lock, flags);
+       spin_unlock_irqrestore(&irq_map_lock, flags);
+}
 
-       synchronize_irq(irq);
+void irq_unlink(unsigned int irq)
+{
+       struct irq_bucket *p, **pnext;
+       unsigned long flags;
 
-       spin_lock_irqsave(&irq_action_lock, flags);
+       BUG_ON(irq >= NR_IRQS);
 
-       kfree(action);
+       spin_lock_irqsave(&irq_map_lock, flags);
 
-       if (!sparc_irq[cpu_irq].action)
-               __disable_irq(irq);
+       p = &irq_table[irq];
+       BUG_ON(p->pil > SUN4D_MAX_IRQ);
+       pnext = &irq_map[p->pil];
+       while (*pnext != p)
+               pnext = &(*pnext)->next;
+       *pnext = p->next;
 
-out_unlock:
-       spin_unlock_irqrestore(&irq_action_lock, flags);
+       spin_unlock_irqrestore(&irq_map_lock, flags);
 }
-EXPORT_SYMBOL(free_irq);
-
-/*
- * This is called when we want to synchronize with
- * interrupts. We may for example tell a device to
- * stop sending interrupts: but to make sure there
- * are no interrupts that are executing on another
- * CPU we need to call this function.
- */
-#ifdef CONFIG_SMP
-void synchronize_irq(unsigned int irq)
-{
-       unsigned int cpu_irq;
 
-       cpu_irq = irq & (NR_IRQS - 1);
-       while (sparc_irq[cpu_irq].flags & SPARC_IRQ_INPROGRESS)
-               cpu_relax();
-}
-EXPORT_SYMBOL(synchronize_irq);
-#endif /* SMP */
 
-void unexpected_irq(int irq, void *dev_id, struct pt_regs *regs)
+/* /proc/interrupts printing */
+int arch_show_interrupts(struct seq_file *p, int prec)
 {
-       int i;
-       struct irqaction *action;
-       unsigned int cpu_irq;
+       int j;
 
-       cpu_irq = irq & (NR_IRQS - 1);
-       action = sparc_irq[cpu_irq].action;
-
-       printk(KERN_ERR "IO device interrupt, irq = %d\n", irq);
-       printk(KERN_ERR "PC = %08lx NPC = %08lx FP=%08lx\n", regs->pc,
-                   regs->npc, regs->u_regs[14]);
-       if (action) {
-               printk(KERN_ERR "Expecting: ");
-               for (i = 0; i < 16; i++)
-                       if (action->handler)
-                               printk(KERN_CONT "[%s:%d:0x%x] ", action->name,
-                                      i, (unsigned int)action->handler);
-       }
-       printk(KERN_ERR "AIEEE\n");
-       panic("bogus interrupt received");
+#ifdef CONFIG_SMP
+       seq_printf(p, "RES: ");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ", cpu_data(j).irq_resched_count);
+       seq_printf(p, "     IPI rescheduling interrupts\n");
+       seq_printf(p, "CAL: ");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ", cpu_data(j).irq_call_count);
+       seq_printf(p, "     IPI function call interrupts\n");
+#endif
+       seq_printf(p, "NMI: ");
+       for_each_online_cpu(j)
+               seq_printf(p, "%10u ", cpu_data(j).counter);
+       seq_printf(p, "     Non-maskable interrupts\n");
+       return 0;
 }
 
-void handler_irq(int pil, struct pt_regs *regs)
+void handler_irq(unsigned int pil, struct pt_regs *regs)
 {
        struct pt_regs *old_regs;
-       struct irqaction *action;
-       int cpu = smp_processor_id();
+       struct irq_bucket *p;
 
+       BUG_ON(pil > 15);
        old_regs = set_irq_regs(regs);
        irq_enter();
-       disable_pil_irq(pil);
-#ifdef CONFIG_SMP
-       /* Only rotate on lower priority IRQs (scsi, ethernet, etc.). */
-       if ((sparc_cpu_model==sun4m) && (pil < 10))
-               smp4m_irq_rotate(cpu);
-#endif
-       action = sparc_irq[pil].action;
-       sparc_irq[pil].flags |= SPARC_IRQ_INPROGRESS;
-       kstat_cpu(cpu).irqs[pil]++;
-       do {
-               if (!action || !action->handler)
-                       unexpected_irq(pil, NULL, regs);
-               action->handler(pil, action->dev_id);
-               action = action->next;
-       } while (action);
-       sparc_irq[pil].flags &= ~SPARC_IRQ_INPROGRESS;
-       enable_pil_irq(pil);
+
+       p = irq_map[pil];
+       while (p) {
+               struct irq_bucket *next = p->next;
+
+               generic_handle_irq(p->irq);
+               p = next;
+       }
        irq_exit();
        set_irq_regs(old_regs);
 }
 
 #if defined(CONFIG_BLK_DEV_FD) || defined(CONFIG_BLK_DEV_FD_MODULE)
+static unsigned int floppy_irq;
 
-/*
- * Fast IRQs on the Sparc can only have one routine attached to them,
- * thus no sharing possible.
- */
-static int request_fast_irq(unsigned int irq,
-                           void (*handler)(void),
-                           unsigned long irqflags, const char *devname)
+int sparc_floppy_request_irq(unsigned int irq, irq_handler_t irq_handler)
 {
-       struct irqaction *action;
-       unsigned long flags;
        unsigned int cpu_irq;
-       int ret;
+       int err;
+
 #if defined CONFIG_SMP && !defined CONFIG_SPARC_LEON
        struct tt_entry *trap_table;
 #endif
-       cpu_irq = irq & (NR_IRQS - 1);
-       if (cpu_irq > 14) {
-               ret = -EINVAL;
-               goto out;
-       }
-       if (!handler) {
-               ret = -EINVAL;
-               goto out;
-       }
 
-       spin_lock_irqsave(&irq_action_lock, flags);
+       err = request_irq(irq, irq_handler, 0, "floppy", NULL);
+       if (err)
+               return -1;
 
-       action = sparc_irq[cpu_irq].action;
-       if (action) {
-               if (action->flags & IRQF_SHARED)
-                       panic("Trying to register fast irq when already shared.\n");
-               if (irqflags & IRQF_SHARED)
-                       panic("Trying to register fast irq as shared.\n");
+       /* Save for later use in floppy interrupt handler */
+       floppy_irq = irq;
 
-               /* Anyway, someone already owns it so cannot be made fast. */
-               printk(KERN_ERR "request_fast_irq: Trying to register yet already owned.\n");
-               ret = -EBUSY;
-               goto out_unlock;
-       }
-
-       /*
-        * If this is flagged as statically allocated then we use our
-        * private struct which is never freed.
-        */
-       if (irqflags & SA_STATIC_ALLOC) {
-               if (static_irq_count < MAX_STATIC_ALLOC)
-                       action = &static_irqaction[static_irq_count++];
-               else
-                       printk(KERN_ERR "Fast IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
-                              irq, devname);
-       }
-
-       if (action == NULL)
-               action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
-       if (!action) {
-               ret = -ENOMEM;
-               goto out_unlock;
-       }
+       cpu_irq = (irq & (NR_IRQS - 1));
 
        /* Dork with trap table if we get this far. */
 #define INSTANTIATE(table) \
        table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_one = SPARC_RD_PSR_L0; \
        table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two = \
-               SPARC_BRANCH((unsigned long) handler, \
+               SPARC_BRANCH((unsigned long) floppy_hardint, \
                             (unsigned long) &table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_two);\
        table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_three = SPARC_RD_WIM_L3; \
        table[SP_TRAP_IRQ1+(cpu_irq-1)].inst_four = SPARC_NOP;
@@ -399,22 +289,9 @@ static int request_fast_irq(unsigned int irq,
         * writing we have no CPU-neutral interface to fine-grained flushes.
         */
        flush_cache_all();
-
-       action->flags = irqflags;
-       action->name = devname;
-       action->dev_id = NULL;
-       action->next = NULL;
-
-       sparc_irq[cpu_irq].action = action;
-
-       __enable_irq(irq);
-
-       ret = 0;
-out_unlock:
-       spin_unlock_irqrestore(&irq_action_lock, flags);
-out:
-       return ret;
+       return 0;
 }
+EXPORT_SYMBOL(sparc_floppy_request_irq);
 
 /*
  * These variables are used to access state from the assembler
@@ -440,154 +317,23 @@ EXPORT_SYMBOL(pdma_base);
 unsigned long pdma_areasize;
 EXPORT_SYMBOL(pdma_areasize);
 
-static irq_handler_t floppy_irq_handler;
-
+/* Use the generic irq support to call floppy_interrupt
+ * which was setup using request_irq() in sparc_floppy_request_irq().
+ * We only have one floppy interrupt so we do not need to check
+ * for additional handlers being wired up by irq_link()
+ */
 void sparc_floppy_irq(int irq, void *dev_id, struct pt_regs *regs)
 {
        struct pt_regs *old_regs;
-       int cpu = smp_processor_id();
 
        old_regs = set_irq_regs(regs);
-       disable_pil_irq(irq);
        irq_enter();
-       kstat_cpu(cpu).irqs[irq]++;
-       floppy_irq_handler(irq, dev_id);
+       generic_handle_irq(floppy_irq);
        irq_exit();
-       enable_pil_irq(irq);
        set_irq_regs(old_regs);
-       /*
-        * XXX Eek, it's totally changed with preempt_count() and such
-        * if (softirq_pending(cpu))
-        *      do_softirq();
-        */
-}
-
-int sparc_floppy_request_irq(int irq, unsigned long flags,
-                            irq_handler_t irq_handler)
-{
-       floppy_irq_handler = irq_handler;
-       return request_fast_irq(irq, floppy_hardint, flags, "floppy");
 }
-EXPORT_SYMBOL(sparc_floppy_request_irq);
-
 #endif
 
-int request_irq(unsigned int irq,
-               irq_handler_t handler,
-               unsigned long irqflags, const char *devname, void *dev_id)
-{
-       struct irqaction *action, **actionp;
-       unsigned long flags;
-       unsigned int cpu_irq;
-       int ret;
-
-       if (sparc_cpu_model == sun4d)
-               return sun4d_request_irq(irq, handler, irqflags, devname, dev_id);
-
-       cpu_irq = irq & (NR_IRQS - 1);
-       if (cpu_irq > 14) {
-               ret = -EINVAL;
-               goto out;
-       }
-       if (!handler) {
-               ret = -EINVAL;
-               goto out;
-       }
-
-       spin_lock_irqsave(&irq_action_lock, flags);
-
-       actionp = &sparc_irq[cpu_irq].action;
-       action = *actionp;
-       if (action) {
-               if (!(action->flags & IRQF_SHARED) || !(irqflags & IRQF_SHARED)) {
-                       ret = -EBUSY;
-                       goto out_unlock;
-               }
-               if ((action->flags & IRQF_DISABLED) != (irqflags & IRQF_DISABLED)) {
-                       printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n",
-                              irq);
-                       ret = -EBUSY;
-                       goto out_unlock;
-               }
-               for ( ; action; action = *actionp)
-                       actionp = &action->next;
-       }
-
-       /* If this is flagged as statically allocated then we use our
-        * private struct which is never freed.
-        */
-       if (irqflags & SA_STATIC_ALLOC) {
-               if (static_irq_count < MAX_STATIC_ALLOC)
-                       action = &static_irqaction[static_irq_count++];
-               else
-                       printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
-                              irq, devname);
-       }
-       if (action == NULL)
-               action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
-       if (!action) {
-               ret = -ENOMEM;
-               goto out_unlock;
-       }
-
-       action->handler = handler;
-       action->flags = irqflags;
-       action->name = devname;
-       action->next = NULL;
-       action->dev_id = dev_id;
-
-       *actionp = action;
-
-       __enable_irq(irq);
-
-       ret = 0;
-out_unlock:
-       spin_unlock_irqrestore(&irq_action_lock, flags);
-out:
-       return ret;
-}
-EXPORT_SYMBOL(request_irq);
-
-void disable_irq_nosync(unsigned int irq)
-{
-       __disable_irq(irq);
-}
-EXPORT_SYMBOL(disable_irq_nosync);
-
-void disable_irq(unsigned int irq)
-{
-       __disable_irq(irq);
-}
-EXPORT_SYMBOL(disable_irq);
-
-void enable_irq(unsigned int irq)
-{
-       __enable_irq(irq);
-}
-EXPORT_SYMBOL(enable_irq);
-
-/*
- * We really don't need these at all on the Sparc.  We only have
- * stubs here because they are exported to modules.
- */
-unsigned long probe_irq_on(void)
-{
-       return 0;
-}
-EXPORT_SYMBOL(probe_irq_on);
-
-int probe_irq_off(unsigned long mask)
-{
-       return 0;
-}
-EXPORT_SYMBOL(probe_irq_off);
-
-static unsigned int build_device_irq(struct platform_device *op,
-                                     unsigned int real_irq)
-{
-       return real_irq;
-}
-
 /* djhr
  * This could probably be made indirect too and assigned in the CPU
  * bits of the code. That would be much nicer I think and would also
@@ -598,8 +344,6 @@ static unsigned int build_device_irq(struct platform_device *op,
 
 void __init init_IRQ(void)
 {
-       sparc_irq_config.build_device_irq = build_device_irq;
-
        switch (sparc_cpu_model) {
        case sun4c:
        case sun4:
@@ -607,14 +351,11 @@ void __init init_IRQ(void)
                break;
 
        case sun4m:
-#ifdef CONFIG_PCI
                pcic_probe();
-               if (pcic_present()) {
+               if (pcic_present())
                        sun4m_pci_init_IRQ();
-                       break;
-               }
-#endif
-               sun4m_init_IRQ();
+               else
+                       sun4m_init_IRQ();
                break;
 
        case sun4d:
@@ -632,9 +373,3 @@ void __init init_IRQ(void)
        btfixup();
 }
 
-#ifdef CONFIG_PROC_FS
-void init_irq_proc(void)
-{
-       /* For now, nothing... */
-}
-#endif /* CONFIG_PROC_FS */
index b1d275c..4e78862 100644 (file)
@@ -224,13 +224,13 @@ static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity)
        int cpuid;
 
        cpumask_copy(&mask, affinity);
-       if (cpus_equal(mask, cpu_online_map)) {
+       if (cpumask_equal(&mask, cpu_online_mask)) {
                cpuid = map_to_cpu(irq);
        } else {
                cpumask_t tmp;
 
-               cpus_and(tmp, cpu_online_map, mask);
-               cpuid = cpus_empty(tmp) ? map_to_cpu(irq) : first_cpu(tmp);
+               cpumask_and(&tmp, cpu_online_mask, &mask);
+               cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp);
        }
 
        return cpuid;
index 24ad449..6f6544c 100644 (file)
@@ -6,11 +6,9 @@
 #include <asm/traps.h>
 
 /* cpu.c */
-extern const char *sparc_cpu_type;
 extern const char *sparc_pmu_type;
-extern const char *sparc_fpu_type;
-
 extern unsigned int fsr_storage;
+extern int ncpus_probed;
 
 #ifdef CONFIG_SPARC32
 /* cpu.c */
@@ -37,6 +35,7 @@ extern void sun4c_init_IRQ(void);
 extern unsigned int lvl14_resolution;
 
 extern void sun4m_init_IRQ(void);
+extern void sun4m_unmask_profile_irq(void);
 extern void sun4m_clear_profile_irq(int cpu);
 
 /* sun4d_irq.c */
index 2969f77..2f538ac 100644 (file)
 #include <asm/leon_amba.h>
 #include <asm/traps.h>
 #include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <asm/setup.h>
 
 #include "prom.h"
 #include "irq.h"
 
 struct leon3_irqctrl_regs_map *leon3_irqctrl_regs; /* interrupt controller base address */
 struct leon3_gptimer_regs_map *leon3_gptimer_regs; /* timer controller base address */
-struct amba_apb_device leon_percpu_timer_dev[16];
 
 int leondebug_irq_disable;
 int leon_debug_irqout;
 static int dummy_master_l10_counter;
 unsigned long amba_system_id;
+static DEFINE_SPINLOCK(leon_irq_lock);
 
 unsigned long leon3_gptimer_irq; /* interrupt controller irq number */
 unsigned long leon3_gptimer_idx; /* Timer Index (0..6) within Timer Core */
+int leon3_ticker_irq; /* Timer ticker IRQ */
 unsigned int sparc_leon_eirq;
-#define LEON_IMASK ((&leon3_irqctrl_regs->mask[0]))
+#define LEON_IMASK(cpu) (&leon3_irqctrl_regs->mask[cpu])
+#define LEON_IACK (&leon3_irqctrl_regs->iclear)
+#define LEON_DO_ACK_HW 1
 
-/* Return the IRQ of the pending IRQ on the extended IRQ controller */
-int sparc_leon_eirq_get(int eirq, int cpu)
+/* Return the last ACKed IRQ by the Extended IRQ controller. It has already
+ * been (automatically) ACKed when the CPU takes the trap.
+ */
+static inline unsigned int leon_eirq_get(int cpu)
 {
        return LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->intid[cpu]) & 0x1f;
 }
 
-irqreturn_t sparc_leon_eirq_isr(int dummy, void *dev_id)
+/* Handle one or multiple IRQs from the extended interrupt controller */
+static void leon_handle_ext_irq(unsigned int irq, struct irq_desc *desc)
 {
-       printk(KERN_ERR "sparc_leon_eirq_isr: ERROR EXTENDED IRQ\n");
-       return IRQ_HANDLED;
+       unsigned int eirq;
+       int cpu = sparc_leon3_cpuid();
+
+       eirq = leon_eirq_get(cpu);
+       if ((eirq & 0x10) && irq_map[eirq]->irq) /* bit4 tells if IRQ happened */
+               generic_handle_irq(irq_map[eirq]->irq);
 }
 
 /* The extended IRQ controller has been found, this function registers it */
-void sparc_leon_eirq_register(int eirq)
+void leon_eirq_setup(unsigned int eirq)
 {
-       int irq;
+       unsigned long mask, oldmask;
+       unsigned int veirq;
 
-       /* Register a "BAD" handler for this interrupt, it should never happen */
-       irq = request_irq(eirq, sparc_leon_eirq_isr,
-                         (IRQF_DISABLED | SA_STATIC_ALLOC), "extirq", NULL);
-
-       if (irq) {
-               printk(KERN_ERR
-                      "sparc_leon_eirq_register: unable to attach IRQ%d\n",
-                      eirq);
-       } else {
-               sparc_leon_eirq = eirq;
+       if (eirq < 1 || eirq > 0xf) {
+               printk(KERN_ERR "LEON EXT IRQ NUMBER BAD: %d\n", eirq);
+               return;
        }
 
+       veirq = leon_build_device_irq(eirq, leon_handle_ext_irq, "extirq", 0);
+
+       /*
+        * Unmask the Extended IRQ, the IRQs routed through the Ext-IRQ
+        * controller have a mask-bit of their own, so this is safe.
+        */
+       irq_link(veirq);
+       mask = 1 << eirq;
+       oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(boot_cpu_id));
+       LEON3_BYPASS_STORE_PA(LEON_IMASK(boot_cpu_id), (oldmask | mask));
+       sparc_leon_eirq = eirq;
 }
 
 static inline unsigned long get_irqmask(unsigned int irq)
@@ -83,35 +100,151 @@ static inline unsigned long get_irqmask(unsigned int irq)
        return mask;
 }
 
-static void leon_enable_irq(unsigned int irq_nr)
+#ifdef CONFIG_SMP
+static int irq_choose_cpu(const struct cpumask *affinity)
 {
-       unsigned long mask, flags;
-       mask = get_irqmask(irq_nr);
-       local_irq_save(flags);
-       LEON3_BYPASS_STORE_PA(LEON_IMASK,
-                             (LEON3_BYPASS_LOAD_PA(LEON_IMASK) | (mask)));
-       local_irq_restore(flags);
+       cpumask_t mask;
+
+       cpus_and(mask, cpu_online_map, *affinity);
+       if (cpus_equal(mask, cpu_online_map) || cpus_empty(mask))
+               return boot_cpu_id;
+       else
+               return first_cpu(mask);
 }
+#else
+#define irq_choose_cpu(affinity) boot_cpu_id
+#endif
 
-static void leon_disable_irq(unsigned int irq_nr)
+static int leon_set_affinity(struct irq_data *data, const struct cpumask *dest,
+                            bool force)
 {
-       unsigned long mask, flags;
-       mask = get_irqmask(irq_nr);
-       local_irq_save(flags);
-       LEON3_BYPASS_STORE_PA(LEON_IMASK,
-                             (LEON3_BYPASS_LOAD_PA(LEON_IMASK) & ~(mask)));
-       local_irq_restore(flags);
+       unsigned long mask, oldmask, flags;
+       int oldcpu, newcpu;
+
+       mask = (unsigned long)data->chip_data;
+       oldcpu = irq_choose_cpu(data->affinity);
+       newcpu = irq_choose_cpu(dest);
+
+       if (oldcpu == newcpu)
+               goto out;
+
+       /* unmask on old CPU first before enabling on the selected CPU */
+       spin_lock_irqsave(&leon_irq_lock, flags);
+       oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(oldcpu));
+       LEON3_BYPASS_STORE_PA(LEON_IMASK(oldcpu), (oldmask & ~mask));
+       oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(newcpu));
+       LEON3_BYPASS_STORE_PA(LEON_IMASK(newcpu), (oldmask | mask));
+       spin_unlock_irqrestore(&leon_irq_lock, flags);
+out:
+       return IRQ_SET_MASK_OK;
+}
+
+static void leon_unmask_irq(struct irq_data *data)
+{
+       unsigned long mask, oldmask, flags;
+       int cpu;
+
+       mask = (unsigned long)data->chip_data;
+       cpu = irq_choose_cpu(data->affinity);
+       spin_lock_irqsave(&leon_irq_lock, flags);
+       oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu));
+       LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask | mask));
+       spin_unlock_irqrestore(&leon_irq_lock, flags);
+}
+
+static void leon_mask_irq(struct irq_data *data)
+{
+       unsigned long mask, oldmask, flags;
+       int cpu;
+
+       mask = (unsigned long)data->chip_data;
+       cpu = irq_choose_cpu(data->affinity);
+       spin_lock_irqsave(&leon_irq_lock, flags);
+       oldmask = LEON3_BYPASS_LOAD_PA(LEON_IMASK(cpu));
+       LEON3_BYPASS_STORE_PA(LEON_IMASK(cpu), (oldmask & ~mask));
+       spin_unlock_irqrestore(&leon_irq_lock, flags);
+}
+
+static unsigned int leon_startup_irq(struct irq_data *data)
+{
+       irq_link(data->irq);
+       leon_unmask_irq(data);
+       return 0;
+}
 
+static void leon_shutdown_irq(struct irq_data *data)
+{
+       leon_mask_irq(data);
+       irq_unlink(data->irq);
+}
+
+/* Used by external level sensitive IRQ handlers on the LEON: ACK IRQ ctrl */
+static void leon_eoi_irq(struct irq_data *data)
+{
+       unsigned long mask = (unsigned long)data->chip_data;
+
+       if (mask & LEON_DO_ACK_HW)
+               LEON3_BYPASS_STORE_PA(LEON_IACK, mask & ~LEON_DO_ACK_HW);
+}
+
+static struct irq_chip leon_irq = {
+       .name                   = "leon",
+       .irq_startup            = leon_startup_irq,
+       .irq_shutdown           = leon_shutdown_irq,
+       .irq_mask               = leon_mask_irq,
+       .irq_unmask             = leon_unmask_irq,
+       .irq_eoi                = leon_eoi_irq,
+       .irq_set_affinity       = leon_set_affinity,
+};
+
+/*
+ * Build a LEON IRQ for the edge triggered LEON IRQ controller:
+ *  Edge (normal) IRQ           - handle_simple_irq, ack=DONT-CARE, never ack
+ *  Level IRQ (PCI|Level-GPIO)  - handle_fasteoi_irq, ack=1, ack after ISR
+ *  Per-CPU Edge                - handle_percpu_irq, ack=0
+ */
+unsigned int leon_build_device_irq(unsigned int real_irq,
+                                   irq_flow_handler_t flow_handler,
+                                   const char *name, int do_ack)
+{
+       unsigned int irq;
+       unsigned long mask;
+
+       irq = 0;
+       mask = get_irqmask(real_irq);
+       if (mask == 0)
+               goto out;
+
+       irq = irq_alloc(real_irq, real_irq);
+       if (irq == 0)
+               goto out;
+
+       if (do_ack)
+               mask |= LEON_DO_ACK_HW;
+
+       irq_set_chip_and_handler_name(irq, &leon_irq,
+                                     flow_handler, name);
+       irq_set_chip_data(irq, (void *)mask);
+
+out:
+       return irq;
+}
+
+static unsigned int _leon_build_device_irq(struct platform_device *op,
+                                          unsigned int real_irq)
+{
+       return leon_build_device_irq(real_irq, handle_simple_irq, "edge", 0);
 }
 
 void __init leon_init_timers(irq_handler_t counter_fn)
 {
-       int irq;
+       int irq, eirq;
        struct device_node *rootnp, *np, *nnp;
        struct property *pp;
        int len;
-       int cpu, icsel;
+       int icsel;
        int ampopts;
+       int err;
 
        leondebug_irq_disable = 0;
        leon_debug_irqout = 0;
@@ -173,98 +306,85 @@ void __init leon_init_timers(irq_handler_t counter_fn)
                        leon3_gptimer_irq = *(unsigned int *)pp->value;
        } while (0);
 
-       if (leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq) {
-               LEON3_BYPASS_STORE_PA(
-                       &leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0);
-               LEON3_BYPASS_STORE_PA(
-                       &leon3_gptimer_regs->e[leon3_gptimer_idx].rld,
-                       (((1000000 / HZ) - 1)));
-               LEON3_BYPASS_STORE_PA(
+       if (!(leon3_gptimer_regs && leon3_irqctrl_regs && leon3_gptimer_irq))
+               goto bad;
+
+       LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].val, 0);
+       LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].rld,
+                               (((1000000 / HZ) - 1)));
+       LEON3_BYPASS_STORE_PA(
                        &leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl, 0);
 
 #ifdef CONFIG_SMP
-               leon_percpu_timer_dev[0].start = (int)leon3_gptimer_regs;
-               leon_percpu_timer_dev[0].irq = leon3_gptimer_irq + 1 +
-                                              leon3_gptimer_idx;
-
-               if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) &
-                     (1<<LEON3_GPTIMER_SEPIRQ))) {
-                       prom_printf("irq timer not configured with separate irqs\n");
-                       BUG();
-               }
+       leon3_ticker_irq = leon3_gptimer_irq + 1 + leon3_gptimer_idx;
 
-               LEON3_BYPASS_STORE_PA(
-                       &leon3_gptimer_regs->e[leon3_gptimer_idx+1].val, 0);
-               LEON3_BYPASS_STORE_PA(
-                       &leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld,
-                       (((1000000/HZ) - 1)));
-               LEON3_BYPASS_STORE_PA(
-                       &leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl, 0);
-# endif
-
-               /*
-                * The IRQ controller may (if implemented) consist of multiple
-                * IRQ controllers, each mapped on a 4Kb boundary.
-                * Each CPU may be routed to different IRQCTRLs, however
-                * we assume that all CPUs (in SMP system) is routed to the
-                * same IRQ Controller, and for non-SMP only one IRQCTRL is
-                * accessed anyway.
-                * In AMP systems, Linux must run on CPU0 for the time being.
-                */
-               cpu = sparc_leon3_cpuid();
-               icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[cpu/8]);
-               icsel = (icsel >> ((7 - (cpu&0x7)) * 4)) & 0xf;
-               leon3_irqctrl_regs += icsel;
-       } else {
-               goto bad;
+       if (!(LEON3_BYPASS_LOAD_PA(&leon3_gptimer_regs->config) &
+             (1<<LEON3_GPTIMER_SEPIRQ))) {
+               printk(KERN_ERR "timer not configured with separate irqs\n");
+               BUG();
        }
 
-       irq = request_irq(leon3_gptimer_irq+leon3_gptimer_idx,
-                         counter_fn,
-                         (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
+       LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].val,
+                               0);
+       LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].rld,
+                               (((1000000/HZ) - 1)));
+       LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
+                               0);
+#endif
 
-       if (irq) {
-               printk(KERN_ERR "leon_time_init: unable to attach IRQ%d\n",
-                      LEON_INTERRUPT_TIMER1);
+       /*
+        * The IRQ controller may (if implemented) consist of multiple
+        * IRQ controllers, each mapped on a 4Kb boundary.
+        * Each CPU may be routed to different IRQCTRLs, however
+        * we assume that all CPUs (in SMP system) is routed to the
+        * same IRQ Controller, and for non-SMP only one IRQCTRL is
+        * accessed anyway.
+        * In AMP systems, Linux must run on CPU0 for the time being.
+        */
+       icsel = LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->icsel[boot_cpu_id/8]);
+       icsel = (icsel >> ((7 - (boot_cpu_id&0x7)) * 4)) & 0xf;
+       leon3_irqctrl_regs += icsel;
+
+       /* Mask all IRQs on boot-cpu IRQ controller */
+       LEON3_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[boot_cpu_id], 0);
+
+       /* Probe extended IRQ controller */
+       eirq = (LEON3_BYPASS_LOAD_PA(&leon3_irqctrl_regs->mpstatus)
+               >> 16) & 0xf;
+       if (eirq != 0)
+               leon_eirq_setup(eirq);
+
+       irq = _leon_build_device_irq(NULL, leon3_gptimer_irq+leon3_gptimer_idx);
+       err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
+       if (err) {
+               printk(KERN_ERR "unable to attach timer IRQ%d\n", irq);
                prom_halt();
        }
 
-# ifdef CONFIG_SMP
-       {
-               unsigned long flags;
-               struct tt_entry *trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_percpu_timer_dev[0].irq - 1)];
-
-               /* For SMP we use the level 14 ticker, however the bootup code
-                * has copied the firmwares level 14 vector into boot cpu's
-                * trap table, we must fix this now or we get squashed.
-                */
-               local_irq_save(flags);
-
-               patchme_maybe_smp_msg[0] = 0x01000000; /* NOP out the branch */
-
-               /* Adjust so that we jump directly to smpleon_ticker */
-               trap_table->inst_three += smpleon_ticker - real_irq_entry;
+       LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
+                             LEON3_GPTIMER_EN |
+                             LEON3_GPTIMER_RL |
+                             LEON3_GPTIMER_LD |
+                             LEON3_GPTIMER_IRQEN);
 
-               local_flush_cache_all();
-               local_irq_restore(flags);
+#ifdef CONFIG_SMP
+       /* Install per-cpu IRQ handler for broadcasted ticker */
+       irq = leon_build_device_irq(leon3_ticker_irq, handle_percpu_irq,
+                                   "per-cpu", 0);
+       err = request_irq(irq, leon_percpu_timer_interrupt,
+                         IRQF_PERCPU | IRQF_TIMER, "ticker",
+                         NULL);
+       if (err) {
+               printk(KERN_ERR "unable to attach ticker IRQ%d\n", irq);
+               prom_halt();
        }
-# endif
-
-       if (leon3_gptimer_regs) {
-               LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx].ctrl,
-                                     LEON3_GPTIMER_EN |
-                                     LEON3_GPTIMER_RL |
-                                     LEON3_GPTIMER_LD | LEON3_GPTIMER_IRQEN);
 
-#ifdef CONFIG_SMP
-               LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
-                                     LEON3_GPTIMER_EN |
-                                     LEON3_GPTIMER_RL |
-                                     LEON3_GPTIMER_LD |
-                                     LEON3_GPTIMER_IRQEN);
+       LEON3_BYPASS_STORE_PA(&leon3_gptimer_regs->e[leon3_gptimer_idx+1].ctrl,
+                             LEON3_GPTIMER_EN |
+                             LEON3_GPTIMER_RL |
+                             LEON3_GPTIMER_LD |
+                             LEON3_GPTIMER_IRQEN);
 #endif
-
-       }
        return;
 bad:
        printk(KERN_ERR "No Timer/irqctrl found\n");
@@ -281,9 +401,6 @@ void leon_load_profile_irq(int cpu, unsigned int limit)
        BUG();
 }
 
-
-
-
 void __init leon_trans_init(struct device_node *dp)
 {
        if (strcmp(dp->type, "cpu") == 0 && strcmp(dp->name, "<NULL>") == 0) {
@@ -337,22 +454,18 @@ void leon_enable_irq_cpu(unsigned int irq_nr, unsigned int cpu)
 {
        unsigned long mask, flags, *addr;
        mask = get_irqmask(irq_nr);
-       local_irq_save(flags);
-       addr = (unsigned long *)&(leon3_irqctrl_regs->mask[cpu]);
-       LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | (mask)));
-       local_irq_restore(flags);
+       spin_lock_irqsave(&leon_irq_lock, flags);
+       addr = (unsigned long *)LEON_IMASK(cpu);
+       LEON3_BYPASS_STORE_PA(addr, (LEON3_BYPASS_LOAD_PA(addr) | mask));
+       spin_unlock_irqrestore(&leon_irq_lock, flags);
 }
 
 #endif
 
 void __init leon_init_IRQ(void)
 {
-       sparc_irq_config.init_timers = leon_init_timers;
-
-       BTFIXUPSET_CALL(enable_irq, leon_enable_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(disable_irq, leon_disable_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(enable_pil_irq, leon_enable_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(disable_pil_irq, leon_disable_irq, BTFIXUPCALL_NORM);
+       sparc_irq_config.init_timers      = leon_init_timers;
+       sparc_irq_config.build_device_irq = _leon_build_device_irq;
 
        BTFIXUPSET_CALL(clear_clock_irq, leon_clear_clock_irq,
                        BTFIXUPCALL_NORM);
index 8f5de4a..fe8fb44 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <linux/of.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
@@ -29,6 +30,7 @@
 #include <asm/ptrace.h>
 #include <asm/atomic.h>
 #include <asm/irq_regs.h>
+#include <asm/traps.h>
 
 #include <asm/delay.h>
 #include <asm/irq.h>
 extern ctxd_t *srmmu_ctx_table_phys;
 static int smp_processors_ready;
 extern volatile unsigned long cpu_callin_map[NR_CPUS];
-extern unsigned char boot_cpu_id;
 extern cpumask_t smp_commenced_mask;
 void __init leon_configure_cache_smp(void);
+static void leon_ipi_init(void);
+
+/* IRQ number of LEON IPIs */
+int leon_ipi_irq = LEON3_IRQ_IPI_DEFAULT;
 
 static inline unsigned long do_swap(volatile unsigned long *ptr,
                                    unsigned long val)
@@ -94,8 +99,6 @@ void __cpuinit leon_callin(void)
        local_flush_cache_all();
        local_flush_tlb_all();
 
-       cpu_probe();
-
        /* Fix idle thread fields. */
        __asm__ __volatile__("ld [%0], %%g6\n\t" : : "r"(&current_set[cpuid])
                             : "memory" /* paranoid */);
@@ -104,11 +107,11 @@ void __cpuinit leon_callin(void)
        atomic_inc(&init_mm.mm_count);
        current->active_mm = &init_mm;
 
-       while (!cpu_isset(cpuid, smp_commenced_mask))
+       while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
                mb();
 
        local_irq_enable();
-       cpu_set(cpuid, cpu_online_map);
+       set_cpu_online(cpuid, true);
 }
 
 /*
@@ -179,13 +182,16 @@ void __init leon_boot_cpus(void)
        int nrcpu = leon_smp_nrcpus();
        int me = smp_processor_id();
 
+       /* Setup IPI */
+       leon_ipi_init();
+
        printk(KERN_INFO "%d:(%d:%d) cpus mpirq at 0x%x\n", (unsigned int)me,
               (unsigned int)nrcpu, (unsigned int)NR_CPUS,
               (unsigned int)&(leon3_irqctrl_regs->mpstatus));
 
        leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, me);
        leon_enable_irq_cpu(LEON3_IRQ_TICKER, me);
-       leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, me);
+       leon_enable_irq_cpu(leon_ipi_irq, me);
 
        leon_smp_setbroadcast(1 << LEON3_IRQ_TICKER);
 
@@ -220,6 +226,10 @@ int __cpuinit leon_boot_one_cpu(int i)
               (unsigned int)&leon3_irqctrl_regs->mpstatus);
        local_flush_cache_all();
 
+       /* Make sure all IRQs are of from the start for this new CPU */
+       LEON_BYPASS_STORE_PA(&leon3_irqctrl_regs->mask[i], 0);
+
+       /* Wake one CPU */
        LEON_BYPASS_STORE_PA(&(leon3_irqctrl_regs->mpstatus), 1 << i);
 
        /* wheee... it's going... */
@@ -236,7 +246,7 @@ int __cpuinit leon_boot_one_cpu(int i)
        } else {
                leon_enable_irq_cpu(LEON3_IRQ_CROSS_CALL, i);
                leon_enable_irq_cpu(LEON3_IRQ_TICKER, i);
-               leon_enable_irq_cpu(LEON3_IRQ_RESCHEDULE, i);
+               leon_enable_irq_cpu(leon_ipi_irq, i);
        }
 
        local_flush_cache_all();
@@ -262,21 +272,21 @@ void __init leon_smp_done(void)
        local_flush_cache_all();
 
        /* Free unneeded trap tables */
-       if (!cpu_isset(1, cpu_present_map)) {
+       if (!cpu_present(1)) {
                ClearPageReserved(virt_to_page(&trapbase_cpu1));
                init_page_count(virt_to_page(&trapbase_cpu1));
                free_page((unsigned long)&trapbase_cpu1);
                totalram_pages++;
                num_physpages++;
        }
-       if (!cpu_isset(2, cpu_present_map)) {
+       if (!cpu_present(2)) {
                ClearPageReserved(virt_to_page(&trapbase_cpu2));
                init_page_count(virt_to_page(&trapbase_cpu2));
                free_page((unsigned long)&trapbase_cpu2);
                totalram_pages++;
                num_physpages++;
        }
-       if (!cpu_isset(3, cpu_present_map)) {
+       if (!cpu_present(3)) {
                ClearPageReserved(virt_to_page(&trapbase_cpu3));
                init_page_count(virt_to_page(&trapbase_cpu3));
                free_page((unsigned long)&trapbase_cpu3);
@@ -292,6 +302,99 @@ void leon_irq_rotate(int cpu)
 {
 }
 
+struct leon_ipi_work {
+       int single;
+       int msk;
+       int resched;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct leon_ipi_work, leon_ipi_work);
+
+/* Initialize IPIs on the LEON, in order to save IRQ resources only one IRQ
+ * is used for all three types of IPIs.
+ */
+static void __init leon_ipi_init(void)
+{
+       int cpu, len;
+       struct leon_ipi_work *work;
+       struct property *pp;
+       struct device_node *rootnp;
+       struct tt_entry *trap_table;
+       unsigned long flags;
+
+       /* Find IPI IRQ or stick with default value */
+       rootnp = of_find_node_by_path("/ambapp0");
+       if (rootnp) {
+               pp = of_find_property(rootnp, "ipi_num", &len);
+               if (pp && (*(int *)pp->value))
+                       leon_ipi_irq = *(int *)pp->value;
+       }
+       printk(KERN_INFO "leon: SMP IPIs at IRQ %d\n", leon_ipi_irq);
+
+       /* Adjust so that we jump directly to smpleon_ipi */
+       local_irq_save(flags);
+       trap_table = &sparc_ttable[SP_TRAP_IRQ1 + (leon_ipi_irq - 1)];
+       trap_table->inst_three += smpleon_ipi - real_irq_entry;
+       local_flush_cache_all();
+       local_irq_restore(flags);
+
+       for_each_possible_cpu(cpu) {
+               work = &per_cpu(leon_ipi_work, cpu);
+               work->single = work->msk = work->resched = 0;
+       }
+}
+
+static void leon_ipi_single(int cpu)
+{
+       struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+       /* Mark work */
+       work->single = 1;
+
+       /* Generate IRQ on the CPU */
+       set_cpu_int(cpu, leon_ipi_irq);
+}
+
+static void leon_ipi_mask_one(int cpu)
+{
+       struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+       /* Mark work */
+       work->msk = 1;
+
+       /* Generate IRQ on the CPU */
+       set_cpu_int(cpu, leon_ipi_irq);
+}
+
+static void leon_ipi_resched(int cpu)
+{
+       struct leon_ipi_work *work = &per_cpu(leon_ipi_work, cpu);
+
+       /* Mark work */
+       work->resched = 1;
+
+       /* Generate IRQ on the CPU (any IRQ will cause resched) */
+       set_cpu_int(cpu, leon_ipi_irq);
+}
+
+void leonsmp_ipi_interrupt(void)
+{
+       struct leon_ipi_work *work = &__get_cpu_var(leon_ipi_work);
+
+       if (work->single) {
+               work->single = 0;
+               smp_call_function_single_interrupt();
+       }
+       if (work->msk) {
+               work->msk = 0;
+               smp_call_function_interrupt();
+       }
+       if (work->resched) {
+               work->resched = 0;
+               smp_resched_interrupt();
+       }
+}
+
 static struct smp_funcall {
        smpfunc_t func;
        unsigned long arg1;
@@ -337,10 +440,10 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
                {
                        register int i;
 
-                       cpu_clear(smp_processor_id(), mask);
-                       cpus_and(mask, cpu_online_map, mask);
+                       cpumask_clear_cpu(smp_processor_id(), &mask);
+                       cpumask_and(&mask, cpu_online_mask, &mask);
                        for (i = 0; i <= high; i++) {
-                               if (cpu_isset(i, mask)) {
+                               if (cpumask_test_cpu(i, &mask)) {
                                        ccall_info.processors_in[i] = 0;
                                        ccall_info.processors_out[i] = 0;
                                        set_cpu_int(i, LEON3_IRQ_CROSS_CALL);
@@ -354,7 +457,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 
                        i = 0;
                        do {
-                               if (!cpu_isset(i, mask))
+                               if (!cpumask_test_cpu(i, &mask))
                                        continue;
 
                                while (!ccall_info.processors_in[i])
@@ -363,7 +466,7 @@ static void leon_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 
                        i = 0;
                        do {
-                               if (!cpu_isset(i, mask))
+                               if (!cpumask_test_cpu(i, &mask))
                                        continue;
 
                                while (!ccall_info.processors_out[i])
@@ -386,27 +489,23 @@ void leon_cross_call_irq(void)
        ccall_info.processors_out[i] = 1;
 }
 
-void leon_percpu_timer_interrupt(struct pt_regs *regs)
+irqreturn_t leon_percpu_timer_interrupt(int irq, void *unused)
 {
-       struct pt_regs *old_regs;
        int cpu = smp_processor_id();
 
-       old_regs = set_irq_regs(regs);
-
        leon_clear_profile_irq(cpu);
 
        profile_tick(CPU_PROFILING);
 
        if (!--prof_counter(cpu)) {
-               int user = user_mode(regs);
+               int user = user_mode(get_irq_regs());
 
-               irq_enter();
                update_process_times(user);
-               irq_exit();
 
                prof_counter(cpu) = prof_multiplier(cpu);
        }
-       set_irq_regs(old_regs);
+
+       return IRQ_HANDLED;
 }
 
 static void __init smp_setup_percpu_timer(void)
@@ -449,6 +548,9 @@ void __init leon_init_smp(void)
        BTFIXUPSET_CALL(smp_cross_call, leon_cross_call, BTFIXUPCALL_NORM);
        BTFIXUPSET_CALL(__hard_smp_processor_id, __leon_processor_id,
                        BTFIXUPCALL_NORM);
+       BTFIXUPSET_CALL(smp_ipi_resched, leon_ipi_resched, BTFIXUPCALL_NORM);
+       BTFIXUPSET_CALL(smp_ipi_single, leon_ipi_single, BTFIXUPCALL_NORM);
+       BTFIXUPSET_CALL(smp_ipi_mask_one, leon_ipi_mask_one, BTFIXUPCALL_NORM);
 }
 
 #endif /* CONFIG_SPARC_LEON */
index 56db064..42f28c7 100644 (file)
@@ -768,7 +768,7 @@ static void * __cpuinit mdesc_iterate_over_cpus(void *(*func)(struct mdesc_handl
                               cpuid, NR_CPUS);
                        continue;
                }
-               if (!cpu_isset(cpuid, *mask))
+               if (!cpumask_test_cpu(cpuid, mask))
                        continue;
 #endif
 
index 5c14968..3bb2eac 100644 (file)
@@ -622,8 +622,9 @@ static unsigned int __init build_one_device_irq(struct platform_device *op,
 out:
        nid = of_node_to_nid(dp);
        if (nid != -1) {
-               cpumask_t numa_mask = *cpumask_of_node(nid);
+               cpumask_t numa_mask;
 
+               cpumask_copy(&numa_mask, cpumask_of_node(nid));
                irq_set_affinity(irq, &numa_mask);
        }
 
index 30982e9..580651a 100644 (file)
@@ -284,8 +284,9 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
 
        nid = pbm->numa_node;
        if (nid != -1) {
-               cpumask_t numa_mask = *cpumask_of_node(nid);
+               cpumask_t numa_mask;
 
+               cpumask_copy(&numa_mask, cpumask_of_node(nid));
                irq_set_affinity(irq, &numa_mask);
        }
        err = request_irq(irq, sparc64_msiq_interrupt, 0,
index 2cdc131..948601a 100644 (file)
@@ -164,6 +164,9 @@ void __iomem *pcic_regs;
 volatile int pcic_speculative;
 volatile int pcic_trapped;
 
+/* forward */
+unsigned int pcic_build_device_irq(struct platform_device *op,
+                                   unsigned int real_irq);
 
 #define CONFIG_CMD(bus, device_fn, where) (0x80000000 | (((unsigned int)bus) << 16) | (((unsigned int)device_fn) << 8) | (where & ~3))
 
@@ -523,6 +526,7 @@ static void
 pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
 {
        struct pcic_ca2irq *p;
+       unsigned int real_irq;
        int i, ivec;
        char namebuf[64];
 
@@ -551,26 +555,25 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
        i = p->pin;
        if (i >= 0 && i < 4) {
                ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_LO);
-               dev->irq = ivec >> (i << 2) & 0xF;
+               real_irq = ivec >> (i << 2) & 0xF;
        } else if (i >= 4 && i < 8) {
                ivec = readw(pcic->pcic_regs+PCI_INT_SELECT_HI);
-               dev->irq = ivec >> ((i-4) << 2) & 0xF;
+               real_irq = ivec >> ((i-4) << 2) & 0xF;
        } else {                                        /* Corrupted map */
                printk("PCIC: BAD PIN %d\n", i); for (;;) {}
        }
 /* P3 */ /* printk("PCIC: device %s pin %d ivec 0x%x irq %x\n", namebuf, i, ivec, dev->irq); */
 
-       /*
-        * dev->irq=0 means PROM did not bother to program the upper
+       /* real_irq means PROM did not bother to program the upper
         * half of PCIC. This happens on JS-E with PROM 3.11, for instance.
         */
-       if (dev->irq == 0 || p->force) {
+       if (real_irq == 0 || p->force) {
                if (p->irq == 0 || p->irq >= 15) {      /* Corrupted map */
                        printk("PCIC: BAD IRQ %d\n", p->irq); for (;;) {}
                }
                printk("PCIC: setting irq %d at pin %d for device %02x:%02x\n",
                    p->irq, p->pin, dev->bus->number, dev->devfn);
-               dev->irq = p->irq;
+               real_irq = p->irq;
 
                i = p->pin;
                if (i >= 4) {
@@ -584,7 +587,8 @@ pcic_fill_irq(struct linux_pcic *pcic, struct pci_dev *dev, int node)
                        ivec |= p->irq << (i << 2);
                        writew(ivec, pcic->pcic_regs+PCI_INT_SELECT_LO);
                }
-       }
+       }
+       dev->irq = pcic_build_device_irq(NULL, real_irq);
 }
 
 /*
@@ -729,6 +733,7 @@ void __init pci_time_init(void)
        struct linux_pcic *pcic = &pcic0;
        unsigned long v;
        int timer_irq, irq;
+       int err;
 
        do_arch_gettimeoffset = pci_gettimeoffset;
 
@@ -740,9 +745,10 @@ void __init pci_time_init(void)
        timer_irq = PCI_COUNTER_IRQ_SYS(v);
        writel (PCI_COUNTER_IRQ_SET(timer_irq, 0),
                pcic->pcic_regs+PCI_COUNTER_IRQ);
-       irq = request_irq(timer_irq, pcic_timer_handler,
-                         (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
-       if (irq) {
+       irq = pcic_build_device_irq(NULL, timer_irq);
+       err = request_irq(irq, pcic_timer_handler,
+                         IRQF_TIMER, "timer", NULL);
+       if (err) {
                prom_printf("time_init: unable to attach IRQ%d\n", timer_irq);
                prom_halt();
        }
@@ -803,50 +809,73 @@ static inline unsigned long get_irqmask(int irq_nr)
        return 1 << irq_nr;
 }
 
-static void pcic_disable_irq(unsigned int irq_nr)
+static void pcic_mask_irq(struct irq_data *data)
 {
        unsigned long mask, flags;
 
-       mask = get_irqmask(irq_nr);
+       mask = (unsigned long)data->chip_data;
        local_irq_save(flags);
        writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET);
        local_irq_restore(flags);
 }
 
-static void pcic_enable_irq(unsigned int irq_nr)
+static void pcic_unmask_irq(struct irq_data *data)
 {
        unsigned long mask, flags;
 
-       mask = get_irqmask(irq_nr);
+       mask = (unsigned long)data->chip_data;
        local_irq_save(flags);
        writel(mask, pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR);
        local_irq_restore(flags);
 }
 
-static void pcic_load_profile_irq(int cpu, unsigned int limit)
+static unsigned int pcic_startup_irq(struct irq_data *data)
 {
-       printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__);
+       irq_link(data->irq);
+       pcic_unmask_irq(data);
+       return 0;
 }
 
-/* We assume the caller has disabled local interrupts when these are called,
- * or else very bizarre behavior will result.
- */
-static void pcic_disable_pil_irq(unsigned int pil)
+static struct irq_chip pcic_irq = {
+       .name           = "pcic",
+       .irq_startup    = pcic_startup_irq,
+       .irq_mask       = pcic_mask_irq,
+       .irq_unmask     = pcic_unmask_irq,
+};
+
+unsigned int pcic_build_device_irq(struct platform_device *op,
+                                   unsigned int real_irq)
 {
-       writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_SET);
+       unsigned int irq;
+       unsigned long mask;
+
+       irq = 0;
+       mask = get_irqmask(real_irq);
+       if (mask == 0)
+               goto out;
+
+       irq = irq_alloc(real_irq, real_irq);
+       if (irq == 0)
+               goto out;
+
+       irq_set_chip_and_handler_name(irq, &pcic_irq,
+                                     handle_level_irq, "PCIC");
+       irq_set_chip_data(irq, (void *)mask);
+
+out:
+       return irq;
 }
 
-static void pcic_enable_pil_irq(unsigned int pil)
+
+static void pcic_load_profile_irq(int cpu, unsigned int limit)
 {
-       writel(get_irqmask(pil), pcic0.pcic_regs+PCI_SYS_INT_TARGET_MASK_CLEAR);
+       printk("PCIC: unimplemented code: FILE=%s LINE=%d", __FILE__, __LINE__);
 }
 
 void __init sun4m_pci_init_IRQ(void)
 {
-       BTFIXUPSET_CALL(enable_irq, pcic_enable_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(disable_irq, pcic_disable_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(enable_pil_irq, pcic_enable_pil_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(disable_pil_irq, pcic_disable_pil_irq, BTFIXUPCALL_NORM);
+       sparc_irq_config.build_device_irq = pcic_build_device_irq;
+
        BTFIXUPSET_CALL(clear_clock_irq, pcic_clear_clock_irq, BTFIXUPCALL_NORM);
        BTFIXUPSET_CALL(load_profile_irq, pcic_load_profile_irq, BTFIXUPCALL_NORM);
 }
index ee8426e..2cb0e1c 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/nmi.h>
 #include <asm/pcr.h>
 
+#include "kernel.h"
 #include "kstack.h"
 
 /* Sparc64 chips have two performance counters, 32-bits each, with
index 1752929..c8cc461 100644 (file)
@@ -128,8 +128,16 @@ void cpu_idle(void)
         set_thread_flag(TIF_POLLING_NRFLAG);
        /* endless idle loop with no priority at all */
        while(1) {
-               while (!need_resched())
-                       cpu_relax();
+#ifdef CONFIG_SPARC_LEON
+               if (pm_idle) {
+                       while (!need_resched())
+                               (*pm_idle)();
+               } else
+#endif
+               {
+                       while (!need_resched())
+                               cpu_relax();
+               }
                preempt_enable_no_resched();
                schedule();
                preempt_disable();
index 05fb253..5ce3d15 100644 (file)
@@ -326,7 +326,6 @@ void __init of_console_init(void)
                        of_console_options = NULL;
        }
 
-       prom_printf(msg, of_console_path);
        printk(msg, of_console_path);
 }
 
index 7b8b76c..3609bde 100644 (file)
@@ -103,16 +103,20 @@ static unsigned int boot_flags __initdata = 0;
 /* Exported for mm/init.c:paging_init. */
 unsigned long cmdline_memory_size __initdata = 0;
 
+/* which CPU booted us (0xff = not set) */
+unsigned char boot_cpu_id = 0xff; /* 0xff will make it into DATA section... */
+unsigned char boot_cpu_id4; /* boot_cpu_id << 2 */
+
 static void
 prom_console_write(struct console *con, const char *s, unsigned n)
 {
        prom_write(s, n);
 }
 
-static struct console prom_debug_console = {
-       .name =         "debug",
+static struct console prom_early_console = {
+       .name =         "earlyprom",
        .write =        prom_console_write,
-       .flags =        CON_PRINTBUFFER,
+       .flags =        CON_PRINTBUFFER | CON_BOOT,
        .index =        -1,
 };
 
@@ -133,8 +137,7 @@ static void __init process_switch(char c)
                prom_halt();
                break;
        case 'p':
-               /* Use PROM debug console. */
-               register_console(&prom_debug_console);
+               /* Just ignore, this behavior is now the default.  */
                break;
        default:
                printk("Unknown boot switch (-%c)\n", c);
@@ -215,6 +218,10 @@ void __init setup_arch(char **cmdline_p)
        strcpy(boot_command_line, *cmdline_p);
        parse_early_param();
 
+       boot_flags_init(*cmdline_p);
+
+       register_console(&prom_early_console);
+
        /* Set sparc_cpu_model */
        sparc_cpu_model = sun_unknown;
        if (!strcmp(&cputypval[0], "sun4 "))
@@ -265,7 +272,6 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_DUMMY_CONSOLE
        conswitchp = &dummy_con;
 #endif
-       boot_flags_init(*cmdline_p);
 
        idprom_init();
        if (ARCH_SUN4C)
@@ -311,75 +317,6 @@ void __init setup_arch(char **cmdline_p)
        smp_setup_cpu_possible_map();
 }
 
-static int ncpus_probed;
-
-static int show_cpuinfo(struct seq_file *m, void *__unused)
-{
-       seq_printf(m,
-                  "cpu\t\t: %s\n"
-                  "fpu\t\t: %s\n"
-                  "promlib\t\t: Version %d Revision %d\n"
-                  "prom\t\t: %d.%d\n"
-                  "type\t\t: %s\n"
-                  "ncpus probed\t: %d\n"
-                  "ncpus active\t: %d\n"
-#ifndef CONFIG_SMP
-                  "CPU0Bogo\t: %lu.%02lu\n"
-                  "CPU0ClkTck\t: %ld\n"
-#endif
-                  ,
-                  sparc_cpu_type,
-                  sparc_fpu_type ,
-                  romvec->pv_romvers,
-                  prom_rev,
-                  romvec->pv_printrev >> 16,
-                  romvec->pv_printrev & 0xffff,
-                  &cputypval[0],
-                  ncpus_probed,
-                  num_online_cpus()
-#ifndef CONFIG_SMP
-                  , cpu_data(0).udelay_val/(500000/HZ),
-                  (cpu_data(0).udelay_val/(5000/HZ)) % 100,
-                  cpu_data(0).clock_tick
-#endif
-               );
-
-#ifdef CONFIG_SMP
-       smp_bogo(m);
-#endif
-       mmu_info(m);
-#ifdef CONFIG_SMP
-       smp_info(m);
-#endif
-       return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-       /* The pointer we are returning is arbitrary,
-        * it just has to be non-NULL and not IS_ERR
-        * in the success case.
-        */
-       return *pos == 0 ? &c_start : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-       ++*pos;
-       return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-       .start =c_start,
-       .next = c_next,
-       .stop = c_stop,
-       .show = show_cpuinfo,
-};
-
 extern int stop_a_enabled;
 
 void sun_do_break(void)
index 29bafe0..f3b6850 100644 (file)
@@ -339,84 +339,6 @@ void __init setup_arch(char **cmdline_p)
        paging_init();
 }
 
-/* BUFFER is PAGE_SIZE bytes long. */
-
-extern void smp_info(struct seq_file *);
-extern void smp_bogo(struct seq_file *);
-extern void mmu_info(struct seq_file *);
-
-unsigned int dcache_parity_tl1_occurred;
-unsigned int icache_parity_tl1_occurred;
-
-int ncpus_probed;
-
-static int show_cpuinfo(struct seq_file *m, void *__unused)
-{
-       seq_printf(m, 
-                  "cpu\t\t: %s\n"
-                  "fpu\t\t: %s\n"
-                  "pmu\t\t: %s\n"
-                  "prom\t\t: %s\n"
-                  "type\t\t: %s\n"
-                  "ncpus probed\t: %d\n"
-                  "ncpus active\t: %d\n"
-                  "D$ parity tl1\t: %u\n"
-                  "I$ parity tl1\t: %u\n"
-#ifndef CONFIG_SMP
-                  "Cpu0ClkTck\t: %016lx\n"
-#endif
-                  ,
-                  sparc_cpu_type,
-                  sparc_fpu_type,
-                  sparc_pmu_type,
-                  prom_version,
-                  ((tlb_type == hypervisor) ?
-                   "sun4v" :
-                   "sun4u"),
-                  ncpus_probed,
-                  num_online_cpus(),
-                  dcache_parity_tl1_occurred,
-                  icache_parity_tl1_occurred
-#ifndef CONFIG_SMP
-                  , cpu_data(0).clock_tick
-#endif
-               );
-#ifdef CONFIG_SMP
-       smp_bogo(m);
-#endif
-       mmu_info(m);
-#ifdef CONFIG_SMP
-       smp_info(m);
-#endif
-       return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-       /* The pointer we are returning is arbitrary,
-        * it just has to be non-NULL and not IS_ERR
-        * in the success case.
-        */
-       return *pos == 0 ? &c_start : NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-       ++*pos;
-       return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-       .start =c_start,
-       .next = c_next,
-       .stop = c_stop,
-       .show = show_cpuinfo,
-};
-
 extern int stop_a_enabled;
 
 void sun_do_break(void)
index 442286d..d5b3958 100644 (file)
@@ -37,8 +37,6 @@
 #include "irq.h"
 
 volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,};
-unsigned char boot_cpu_id = 0;
-unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
 
 cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 
@@ -130,14 +128,57 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 };
 void smp_send_reschedule(int cpu)
 {
        /*
-        * XXX missing reschedule IPI, see scheduler_ipi()
+        * CPU model dependent way of implementing IPI generation targeting
+        * a single CPU. The trap handler needs only to do trap entry/return
+        * to call schedule.
         */
+       BTFIXUP_CALL(smp_ipi_resched)(cpu);
 }
 
 void smp_send_stop(void)
 {
 }
 
+void arch_send_call_function_single_ipi(int cpu)
+{
+       /* trigger one IPI single call on one CPU */
+       BTFIXUP_CALL(smp_ipi_single)(cpu);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+       int cpu;
+
+       /* trigger IPI mask call on each CPU */
+       for_each_cpu(cpu, mask)
+               BTFIXUP_CALL(smp_ipi_mask_one)(cpu);
+}
+
+void smp_resched_interrupt(void)
+{
+       irq_enter();
+       scheduler_ipi();
+       local_cpu_data().irq_resched_count++;
+       irq_exit();
+       /* re-schedule routine called by interrupt return code. */
+}
+
+void smp_call_function_single_interrupt(void)
+{
+       irq_enter();
+       generic_smp_call_function_single_interrupt();
+       local_cpu_data().irq_call_count++;
+       irq_exit();
+}
+
+void smp_call_function_interrupt(void)
+{
+       irq_enter();
+       generic_smp_call_function_interrupt();
+       local_cpu_data().irq_call_count++;
+       irq_exit();
+}
+
 void smp_flush_cache_all(void)
 {
        xc0((smpfunc_t) BTFIXUP_CALL(local_flush_cache_all));
@@ -153,9 +194,10 @@ void smp_flush_tlb_all(void)
 void smp_flush_cache_mm(struct mm_struct *mm)
 {
        if(mm->context != NO_CONTEXT) {
-               cpumask_t cpu_mask = *mm_cpumask(mm);
-               cpu_clear(smp_processor_id(), cpu_mask);
-               if (!cpus_empty(cpu_mask))
+               cpumask_t cpu_mask;
+               cpumask_copy(&cpu_mask, mm_cpumask(mm));
+               cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+               if (!cpumask_empty(&cpu_mask))
                        xc1((smpfunc_t) BTFIXUP_CALL(local_flush_cache_mm), (unsigned long) mm);
                local_flush_cache_mm(mm);
        }
@@ -164,9 +206,10 @@ void smp_flush_cache_mm(struct mm_struct *mm)
 void smp_flush_tlb_mm(struct mm_struct *mm)
 {
        if(mm->context != NO_CONTEXT) {
-               cpumask_t cpu_mask = *mm_cpumask(mm);
-               cpu_clear(smp_processor_id(), cpu_mask);
-               if (!cpus_empty(cpu_mask)) {
+               cpumask_t cpu_mask;
+               cpumask_copy(&cpu_mask, mm_cpumask(mm));
+               cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+               if (!cpumask_empty(&cpu_mask)) {
                        xc1((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_mm), (unsigned long) mm);
                        if(atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
                                cpumask_copy(mm_cpumask(mm),
@@ -182,9 +225,10 @@ void smp_flush_cache_range(struct vm_area_struct *vma, unsigned long start,
        struct mm_struct *mm = vma->vm_mm;
 
        if (mm->context != NO_CONTEXT) {
-               cpumask_t cpu_mask = *mm_cpumask(mm);
-               cpu_clear(smp_processor_id(), cpu_mask);
-               if (!cpus_empty(cpu_mask))
+               cpumask_t cpu_mask;
+               cpumask_copy(&cpu_mask, mm_cpumask(mm));
+               cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+               if (!cpumask_empty(&cpu_mask))
                        xc3((smpfunc_t) BTFIXUP_CALL(local_flush_cache_range), (unsigned long) vma, start, end);
                local_flush_cache_range(vma, start, end);
        }
@@ -196,9 +240,10 @@ void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
        struct mm_struct *mm = vma->vm_mm;
 
        if (mm->context != NO_CONTEXT) {
-               cpumask_t cpu_mask = *mm_cpumask(mm);
-               cpu_clear(smp_processor_id(), cpu_mask);
-               if (!cpus_empty(cpu_mask))
+               cpumask_t cpu_mask;
+               cpumask_copy(&cpu_mask, mm_cpumask(mm));
+               cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+               if (!cpumask_empty(&cpu_mask))
                        xc3((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_range), (unsigned long) vma, start, end);
                local_flush_tlb_range(vma, start, end);
        }
@@ -209,9 +254,10 @@ void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
        struct mm_struct *mm = vma->vm_mm;
 
        if(mm->context != NO_CONTEXT) {
-               cpumask_t cpu_mask = *mm_cpumask(mm);
-               cpu_clear(smp_processor_id(), cpu_mask);
-               if (!cpus_empty(cpu_mask))
+               cpumask_t cpu_mask;
+               cpumask_copy(&cpu_mask, mm_cpumask(mm));
+               cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+               if (!cpumask_empty(&cpu_mask))
                        xc2((smpfunc_t) BTFIXUP_CALL(local_flush_cache_page), (unsigned long) vma, page);
                local_flush_cache_page(vma, page);
        }
@@ -222,19 +268,15 @@ void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
        struct mm_struct *mm = vma->vm_mm;
 
        if(mm->context != NO_CONTEXT) {
-               cpumask_t cpu_mask = *mm_cpumask(mm);
-               cpu_clear(smp_processor_id(), cpu_mask);
-               if (!cpus_empty(cpu_mask))
+               cpumask_t cpu_mask;
+               cpumask_copy(&cpu_mask, mm_cpumask(mm));
+               cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+               if (!cpumask_empty(&cpu_mask))
                        xc2((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_page), (unsigned long) vma, page);
                local_flush_tlb_page(vma, page);
        }
 }
 
-void smp_reschedule_irq(void)
-{
-       set_need_resched();
-}
-
 void smp_flush_page_to_ram(unsigned long page)
 {
        /* Current theory is that those who call this are the one's
@@ -251,9 +293,10 @@ void smp_flush_page_to_ram(unsigned long page)
 
 void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
 {
-       cpumask_t cpu_mask = *mm_cpumask(mm);
-       cpu_clear(smp_processor_id(), cpu_mask);
-       if (!cpus_empty(cpu_mask))
+       cpumask_t cpu_mask;
+       cpumask_copy(&cpu_mask, mm_cpumask(mm));
+       cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
+       if (!cpumask_empty(&cpu_mask))
                xc2((smpfunc_t) BTFIXUP_CALL(local_flush_sig_insns), (unsigned long) mm, insn_addr);
        local_flush_sig_insns(mm, insn_addr);
 }
@@ -407,7 +450,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
        };
 
        if (!ret) {
-               cpu_set(cpu, smp_commenced_mask);
+               cpumask_set_cpu(cpu, &smp_commenced_mask);
                while (!cpu_online(cpu))
                        mb();
        }
index 9478da7..99cb172 100644 (file)
@@ -121,11 +121,11 @@ void __cpuinit smp_callin(void)
        /* inform the notifiers about the new cpu */
        notify_cpu_starting(cpuid);
 
-       while (!cpu_isset(cpuid, smp_commenced_mask))
+       while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
                rmb();
 
        ipi_call_lock_irq();
-       cpu_set(cpuid, cpu_online_map);
+       set_cpu_online(cpuid, true);
        ipi_call_unlock_irq();
 
        /* idle thread is expected to have preempt disabled */
@@ -785,7 +785,7 @@ static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask
 
 /* Send cross call to all processors mentioned in MASK_P
  * except self.  Really, there are only two cases currently,
- * "&cpu_online_map" and "&mm->cpu_vm_mask".
+ * "cpu_online_mask" and "mm_cpumask(mm)".
  */
 static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
 {
@@ -797,7 +797,7 @@ static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 d
 /* Send cross call to all processors except self. */
 static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
 {
-       smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map);
+       smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask);
 }
 
 extern unsigned long xcall_sync_tick;
@@ -805,7 +805,7 @@ extern unsigned long xcall_sync_tick;
 static void smp_start_sync_tick_client(int cpu)
 {
        xcall_deliver((u64) &xcall_sync_tick, 0, 0,
-                     &cpumask_of_cpu(cpu));
+                     cpumask_of(cpu));
 }
 
 extern unsigned long xcall_call_function;
@@ -820,7 +820,7 @@ extern unsigned long xcall_call_function_single;
 void arch_send_call_function_single_ipi(int cpu)
 {
        xcall_deliver((u64) &xcall_call_function_single, 0, 0,
-                     &cpumask_of_cpu(cpu));
+                     cpumask_of(cpu));
 }
 
 void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
@@ -918,7 +918,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu)
                }
                if (data0) {
                        xcall_deliver(data0, __pa(pg_addr),
-                                     (u64) pg_addr, &cpumask_of_cpu(cpu));
+                                     (u64) pg_addr, cpumask_of(cpu));
 #ifdef CONFIG_DEBUG_DCFLUSH
                        atomic_inc(&dcpage_flushes_xcall);
 #endif
@@ -954,7 +954,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
        }
        if (data0) {
                xcall_deliver(data0, __pa(pg_addr),
-                             (u64) pg_addr, &cpu_online_map);
+                             (u64) pg_addr, cpu_online_mask);
 #ifdef CONFIG_DEBUG_DCFLUSH
                atomic_inc(&dcpage_flushes_xcall);
 #endif
@@ -1197,32 +1197,32 @@ void __devinit smp_fill_in_sib_core_maps(void)
        for_each_present_cpu(i) {
                unsigned int j;
 
-               cpus_clear(cpu_core_map[i]);
+               cpumask_clear(&cpu_core_map[i]);
                if (cpu_data(i).core_id == 0) {
-                       cpu_set(i, cpu_core_map[i]);
+                       cpumask_set_cpu(i, &cpu_core_map[i]);
                        continue;
                }
 
                for_each_present_cpu(j) {
                        if (cpu_data(i).core_id ==
                            cpu_data(j).core_id)
-                               cpu_set(j, cpu_core_map[i]);
+                               cpumask_set_cpu(j, &cpu_core_map[i]);
                }
        }
 
        for_each_present_cpu(i) {
                unsigned int j;
 
-               cpus_clear(per_cpu(cpu_sibling_map, i));
+               cpumask_clear(&per_cpu(cpu_sibling_map, i));
                if (cpu_data(i).proc_id == -1) {
-                       cpu_set(i, per_cpu(cpu_sibling_map, i));
+                       cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i));
                        continue;
                }
 
                for_each_present_cpu(j) {
                        if (cpu_data(i).proc_id ==
                            cpu_data(j).proc_id)
-                               cpu_set(j, per_cpu(cpu_sibling_map, i));
+                               cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i));
                }
        }
 }
@@ -1232,10 +1232,10 @@ int __cpuinit __cpu_up(unsigned int cpu)
        int ret = smp_boot_one_cpu(cpu);
 
        if (!ret) {
-               cpu_set(cpu, smp_commenced_mask);
-               while (!cpu_isset(cpu, cpu_online_map))
+               cpumask_set_cpu(cpu, &smp_commenced_mask);
+               while (!cpu_online(cpu))
                        mb();
-               if (!cpu_isset(cpu, cpu_online_map)) {
+               if (!cpu_online(cpu)) {
                        ret = -ENODEV;
                } else {
                        /* On SUN4V, writes to %tick and %stick are
@@ -1269,7 +1269,7 @@ void cpu_play_dead(void)
                                tb->nonresum_mondo_pa, 0);
        }
 
-       cpu_clear(cpu, smp_commenced_mask);
+       cpumask_clear_cpu(cpu, &smp_commenced_mask);
        membar_safe("#Sync");
 
        local_irq_disable();
@@ -1290,13 +1290,13 @@ int __cpu_disable(void)
        cpuinfo_sparc *c;
        int i;
 
-       for_each_cpu_mask(i, cpu_core_map[cpu])
-               cpu_clear(cpu, cpu_core_map[i]);
-       cpus_clear(cpu_core_map[cpu]);
+       for_each_cpu(i, &cpu_core_map[cpu])
+               cpumask_clear_cpu(cpu, &cpu_core_map[i]);
+       cpumask_clear(&cpu_core_map[cpu]);
 
-       for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
-               cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
-       cpus_clear(per_cpu(cpu_sibling_map, cpu));
+       for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu))
+               cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i));
+       cpumask_clear(&per_cpu(cpu_sibling_map, cpu));
 
        c = &cpu_data(cpu);
 
@@ -1313,7 +1313,7 @@ int __cpu_disable(void)
        local_irq_disable();
 
        ipi_call_lock();
-       cpu_clear(cpu, cpu_online_map);
+       set_cpu_online(cpu, false);
        ipi_call_unlock();
 
        cpu_map_rebuild();
@@ -1327,11 +1327,11 @@ void __cpu_die(unsigned int cpu)
 
        for (i = 0; i < 100; i++) {
                smp_rmb();
-               if (!cpu_isset(cpu, smp_commenced_mask))
+               if (!cpumask_test_cpu(cpu, &smp_commenced_mask))
                        break;
                msleep(100);
        }
-       if (cpu_isset(cpu, smp_commenced_mask)) {
+       if (cpumask_test_cpu(cpu, &smp_commenced_mask)) {
                printk(KERN_ERR "CPU %u didn't die...\n", cpu);
        } else {
 #if defined(CONFIG_SUN_LDOMS)
@@ -1341,7 +1341,7 @@ void __cpu_die(unsigned int cpu)
                do {
                        hv_err = sun4v_cpu_stop(cpu);
                        if (hv_err == HV_EOK) {
-                               cpu_clear(cpu, cpu_present_map);
+                               set_cpu_present(cpu, false);
                                break;
                        }
                } while (--limit > 0);
@@ -1362,7 +1362,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 void smp_send_reschedule(int cpu)
 {
        xcall_deliver((u64) &xcall_receive_signal, 0, 0,
-                     &cpumask_of_cpu(cpu));
+                     cpumask_of(cpu));
 }
 
 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
index 90eea38..f6bf25a 100644 (file)
  */
 unsigned char __iomem *interrupt_enable;
 
-static void sun4c_disable_irq(unsigned int irq_nr)
+static void sun4c_mask_irq(struct irq_data *data)
 {
-       unsigned long flags;
-       unsigned char current_mask, new_mask;
-
-       local_irq_save(flags);
-       irq_nr &= (NR_IRQS - 1);
-       current_mask = sbus_readb(interrupt_enable);
-       switch (irq_nr) {
-       case 1:
-               new_mask = ((current_mask) & (~(SUN4C_INT_E1)));
-               break;
-       case 8:
-               new_mask = ((current_mask) & (~(SUN4C_INT_E8)));
-               break;
-       case 10:
-               new_mask = ((current_mask) & (~(SUN4C_INT_E10)));
-               break;
-       case 14:
-               new_mask = ((current_mask) & (~(SUN4C_INT_E14)));
-               break;
-       default:
+       unsigned long mask = (unsigned long)data->chip_data;
+
+       if (mask) {
+               unsigned long flags;
+
+               local_irq_save(flags);
+               mask = sbus_readb(interrupt_enable) & ~mask;
+               sbus_writeb(mask, interrupt_enable);
                local_irq_restore(flags);
-               return;
        }
-       sbus_writeb(new_mask, interrupt_enable);
-       local_irq_restore(flags);
 }
 
-static void sun4c_enable_irq(unsigned int irq_nr)
+static void sun4c_unmask_irq(struct irq_data *data)
 {
-       unsigned long flags;
-       unsigned char current_mask, new_mask;
-
-       local_irq_save(flags);
-       irq_nr &= (NR_IRQS - 1);
-       current_mask = sbus_readb(interrupt_enable);
-       switch (irq_nr) {
-       case 1:
-               new_mask = ((current_mask) | SUN4C_INT_E1);
-               break;
-       case 8:
-               new_mask = ((current_mask) | SUN4C_INT_E8);
-               break;
-       case 10:
-               new_mask = ((current_mask) | SUN4C_INT_E10);
-               break;
-       case 14:
-               new_mask = ((current_mask) | SUN4C_INT_E14);
-               break;
-       default:
+       unsigned long mask = (unsigned long)data->chip_data;
+
+       if (mask) {
+               unsigned long flags;
+
+               local_irq_save(flags);
+               mask = sbus_readb(interrupt_enable) | mask;
+               sbus_writeb(mask, interrupt_enable);
                local_irq_restore(flags);
-               return;
        }
-       sbus_writeb(new_mask, interrupt_enable);
-       local_irq_restore(flags);
+}
+
+static unsigned int sun4c_startup_irq(struct irq_data *data)
+{
+       irq_link(data->irq);
+       sun4c_unmask_irq(data);
+
+       return 0;
+}
+
+static void sun4c_shutdown_irq(struct irq_data *data)
+{
+       sun4c_mask_irq(data);
+       irq_unlink(data->irq);
+}
+
+static struct irq_chip sun4c_irq = {
+       .name           = "sun4c",
+       .irq_startup    = sun4c_startup_irq,
+       .irq_shutdown   = sun4c_shutdown_irq,
+       .irq_mask       = sun4c_mask_irq,
+       .irq_unmask     = sun4c_unmask_irq,
+};
+
+static unsigned int sun4c_build_device_irq(struct platform_device *op,
+                                          unsigned int real_irq)
+{
+        unsigned int irq;
+
+       if (real_irq >= 16) {
+               prom_printf("Bogus sun4c IRQ %u\n", real_irq);
+               prom_halt();
+       }
+
+       irq = irq_alloc(real_irq, real_irq);
+       if (irq) {
+               unsigned long mask = 0UL;
+
+               switch (real_irq) {
+               case 1:
+                       mask = SUN4C_INT_E1;
+                       break;
+               case 8:
+                       mask = SUN4C_INT_E8;
+                       break;
+               case 10:
+                       mask = SUN4C_INT_E10;
+                       break;
+               case 14:
+                       mask = SUN4C_INT_E14;
+                       break;
+               default:
+                       /* All the rest are either always enabled,
+                        * or are for signalling software interrupts.
+                        */
+                       break;
+               }
+               irq_set_chip_and_handler_name(irq, &sun4c_irq,
+                                             handle_level_irq, "level");
+               irq_set_chip_data(irq, (void *)mask);
+       }
+       return irq;
 }
 
 struct sun4c_timer_info {
@@ -144,8 +176,9 @@ static void sun4c_load_profile_irq(int cpu, unsigned int limit)
 
 static void __init sun4c_init_timers(irq_handler_t counter_fn)
 {
-       const struct linux_prom_irqs *irq;
+       const struct linux_prom_irqs *prom_irqs;
        struct device_node *dp;
+       unsigned int irq;
        const u32 *addr;
        int err;
 
@@ -163,9 +196,9 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn)
 
        sun4c_timers = (void __iomem *) (unsigned long) addr[0];
 
-       irq = of_get_property(dp, "intr", NULL);
+       prom_irqs = of_get_property(dp, "intr", NULL);
        of_node_put(dp);
-       if (!irq) {
+       if (!prom_irqs) {
                prom_printf("sun4c_init_timers: No intr property\n");
                prom_halt();
        }
@@ -178,15 +211,15 @@ static void __init sun4c_init_timers(irq_handler_t counter_fn)
 
        master_l10_counter = &sun4c_timers->l10_count;
 
-       err = request_irq(irq[0].pri, counter_fn,
-                         (IRQF_DISABLED | SA_STATIC_ALLOC),
-                         "timer", NULL);
+       irq = sun4c_build_device_irq(NULL, prom_irqs[0].pri);
+       err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
        if (err) {
                prom_printf("sun4c_init_timers: request_irq() fails with %d\n", err);
                prom_halt();
        }
 
-       sun4c_disable_irq(irq[1].pri);
+       /* disable timer interrupt */
+       sun4c_mask_irq(irq_get_irq_data(irq));
 }
 
 #ifdef CONFIG_SMP
@@ -215,14 +248,11 @@ void __init sun4c_init_IRQ(void)
 
        interrupt_enable = (void __iomem *) (unsigned long) addr[0];
 
-       BTFIXUPSET_CALL(enable_irq, sun4c_enable_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(disable_irq, sun4c_disable_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(enable_pil_irq, sun4c_enable_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(disable_pil_irq, sun4c_disable_irq, BTFIXUPCALL_NORM);
        BTFIXUPSET_CALL(clear_clock_irq, sun4c_clear_clock_irq, BTFIXUPCALL_NORM);
        BTFIXUPSET_CALL(load_profile_irq, sun4c_load_profile_irq, BTFIXUPCALL_NOP);
 
-       sparc_irq_config.init_timers = sun4c_init_timers;
+       sparc_irq_config.init_timers      = sun4c_init_timers;
+       sparc_irq_config.build_device_irq = sun4c_build_device_irq;
 
 #ifdef CONFIG_SMP
        BTFIXUPSET_CALL(set_cpu_int, sun4c_nop, BTFIXUPCALL_NOP);
index 77b4a89..a9ea60e 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/io.h>
 #include <asm/sbi.h>
 #include <asm/cacheflush.h>
+#include <asm/setup.h>
 
 #include "kernel.h"
 #include "irq.h"
  * cpu local.  CPU local interrupts cover the timer interrupts
  * and whatnot, and we encode those as normal PILs between
  * 0 and 15.
- *
- * SBUS interrupts are encoded integers including the board number
- * (plus one), the SBUS level, and the SBUS slot number.  Sun4D
- * IRQ dispatch is done by:
- *
- * 1) Reading the BW local interrupt table in order to get the bus
- *    interrupt mask.
- *
- *    This table is indexed by SBUS interrupt level which can be
- *    derived from the PIL we got interrupted on.
- *
- * 2) For each bus showing interrupt pending from #1, read the
- *    SBI interrupt state register.  This will indicate which slots
- *    have interrupts pending for that SBUS interrupt level.
+ * SBUS interrupts are encodes as a combination of board, level and slot.
  */
 
+struct sun4d_handler_data {
+       unsigned int cpuid;    /* target cpu */
+       unsigned int real_irq; /* interrupt level */
+};
+
+
+static unsigned int sun4d_encode_irq(int board, int lvl, int slot)
+{
+       return (board + 1) << 5 | (lvl << 2) | slot;
+}
+
 struct sun4d_timer_regs {
        u32     l10_timer_limit;
        u32     l10_cur_countx;
@@ -48,17 +47,12 @@ struct sun4d_timer_regs {
 
 static struct sun4d_timer_regs __iomem *sun4d_timers;
 
-#define TIMER_IRQ      10
-
-#define MAX_STATIC_ALLOC       4
-static unsigned char sbus_tid[32];
-
-static struct irqaction *irq_action[NR_IRQS];
+#define SUN4D_TIMER_IRQ        10
 
-static struct sbus_action {
-       struct irqaction *action;
-       /* For SMP this needs to be extended */
-} *sbus_actions;
+/* Specify which cpu handle interrupts from which board.
+ * Index is board - value is cpu.
+ */
+static unsigned char board_to_cpu[32];
 
 static int pil_to_sbus[] = {
        0,
@@ -79,152 +73,81 @@ static int pil_to_sbus[] = {
        0,
 };
 
-static int sbus_to_pil[] = {
-       0,
-       2,
-       3,
-       5,
-       7,
-       9,
-       11,
-       13,
-};
-
-static int nsbi;
-
 /* Exported for sun4d_smp.c */
 DEFINE_SPINLOCK(sun4d_imsk_lock);
 
-int show_sun4d_interrupts(struct seq_file *p, void *v)
+/* SBUS interrupts are encoded integers including the board number
+ * (plus one), the SBUS level, and the SBUS slot number.  Sun4D
+ * IRQ dispatch is done by:
+ *
+ * 1) Reading the BW local interrupt table in order to get the bus
+ *    interrupt mask.
+ *
+ *    This table is indexed by SBUS interrupt level which can be
+ *    derived from the PIL we got interrupted on.
+ *
+ * 2) For each bus showing interrupt pending from #1, read the
+ *    SBI interrupt state register.  This will indicate which slots
+ *    have interrupts pending for that SBUS interrupt level.
+ *
+ * 3) Call the genreric IRQ support.
+ */
+static void sun4d_sbus_handler_irq(int sbusl)
 {
-       int i = *(loff_t *) v, j = 0, k = 0, sbusl;
-       struct irqaction *action;
-       unsigned long flags;
-#ifdef CONFIG_SMP
-       int x;
-#endif
-
-       spin_lock_irqsave(&irq_action_lock, flags);
-       if (i < NR_IRQS) {
-               sbusl = pil_to_sbus[i];
-               if (!sbusl) {
-                       action = *(i + irq_action);
-                       if (!action)
-                               goto out_unlock;
-               } else {
-                       for (j = 0; j < nsbi; j++) {
-                               for (k = 0; k < 4; k++)
-                                       action = sbus_actions[(j << 5) + (sbusl << 2) + k].action;
-                                       if (action)
-                                               goto found_it;
-                       }
-                       goto out_unlock;
-               }
-found_it:      seq_printf(p, "%3d: ", i);
-#ifndef CONFIG_SMP
-               seq_printf(p, "%10u ", kstat_irqs(i));
-#else
-               for_each_online_cpu(x)
-                       seq_printf(p, "%10u ",
-                              kstat_cpu(cpu_logical_map(x)).irqs[i]);
-#endif
-               seq_printf(p, "%c %s",
-                       (action->flags & IRQF_DISABLED) ? '+' : ' ',
-                       action->name);
-               action = action->next;
-               for (;;) {
-                       for (; action; action = action->next) {
-                               seq_printf(p, ",%s %s",
-                                       (action->flags & IRQF_DISABLED) ? " +" : "",
-                                       action->name);
-                       }
-                       if (!sbusl)
-                               break;
-                       k++;
-                       if (k < 4) {
-                               action = sbus_actions[(j << 5) + (sbusl << 2) + k].action;
-                       } else {
-                               j++;
-                               if (j == nsbi)
-                                       break;
-                               k = 0;
-                               action = sbus_actions[(j << 5) + (sbusl << 2)].action;
+       unsigned int bus_mask;
+       unsigned int sbino, slot;
+       unsigned int sbil;
+
+       bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff;
+       bw_clear_intr_mask(sbusl, bus_mask);
+
+       sbil = (sbusl << 2);
+       /* Loop for each pending SBI */
+       for (sbino = 0; bus_mask; sbino++) {
+               unsigned int idx, mask;
+
+               bus_mask >>= 1;
+               if (!(bus_mask & 1))
+                       continue;
+               /* XXX This seems to ACK the irq twice.  acquire_sbi()
+                * XXX uses swap, therefore this writes 0xf << sbil,
+                * XXX then later release_sbi() will write the individual
+                * XXX bits which were set again.
+                */
+               mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil);
+               mask &= (0xf << sbil);
+
+               /* Loop for each pending SBI slot */
+               idx = 0;
+               slot = (1 << sbil);
+               while (mask != 0) {
+                       unsigned int pil;
+                       struct irq_bucket *p;
+
+                       idx++;
+                       slot <<= 1;
+                       if (!(mask & slot))
+                               continue;
+
+                       mask &= ~slot;
+                       pil = sun4d_encode_irq(sbino, sbil, idx);
+
+                       p = irq_map[pil];
+                       while (p) {
+                               struct irq_bucket *next;
+
+                               next = p->next;
+                               generic_handle_irq(p->irq);
+                               p = next;
                        }
+                       release_sbi(SBI2DEVID(sbino), slot);
                }
-               seq_putc(p, '\n');
        }
-out_unlock:
-       spin_unlock_irqrestore(&irq_action_lock, flags);
-       return 0;
-}
-
-void sun4d_free_irq(unsigned int irq, void *dev_id)
-{
-       struct irqaction *action, **actionp;
-       struct irqaction *tmp = NULL;
-       unsigned long flags;
-
-       spin_lock_irqsave(&irq_action_lock, flags);
-       if (irq < 15)
-               actionp = irq + irq_action;
-       else
-               actionp = &(sbus_actions[irq - (1 << 5)].action);
-       action = *actionp;
-       if (!action) {
-               printk(KERN_ERR "Trying to free free IRQ%d\n", irq);
-               goto out_unlock;
-       }
-       if (dev_id) {
-               for (; action; action = action->next) {
-                       if (action->dev_id == dev_id)
-                               break;
-                       tmp = action;
-               }
-               if (!action) {
-                       printk(KERN_ERR "Trying to free free shared IRQ%d\n",
-                              irq);
-                       goto out_unlock;
-               }
-       } else if (action->flags & IRQF_SHARED) {
-               printk(KERN_ERR "Trying to free shared IRQ%d with NULL device ID\n",
-                      irq);
-               goto out_unlock;
-       }
-       if (action->flags & SA_STATIC_ALLOC) {
-               /*
-                * This interrupt is marked as specially allocated
-                * so it is a bad idea to free it.
-                */
-               printk(KERN_ERR "Attempt to free statically allocated IRQ%d (%s)\n",
-                      irq, action->name);
-               goto out_unlock;
-       }
-
-       if (tmp)
-               tmp->next = action->next;
-       else
-               *actionp = action->next;
-
-       spin_unlock_irqrestore(&irq_action_lock, flags);
-
-       synchronize_irq(irq);
-
-       spin_lock_irqsave(&irq_action_lock, flags);
-
-       kfree(action);
-
-       if (!(*actionp))
-               __disable_irq(irq);
-
-out_unlock:
-       spin_unlock_irqrestore(&irq_action_lock, flags);
 }
 
 void sun4d_handler_irq(int pil, struct pt_regs *regs)
 {
        struct pt_regs *old_regs;
-       struct irqaction *action;
-       int cpu = smp_processor_id();
        /* SBUS IRQ level (1 - 7) */
        int sbusl = pil_to_sbus[pil];
 
@@ -233,160 +156,96 @@ void sun4d_handler_irq(int pil, struct pt_regs *regs)
 
        cc_set_iclr(1 << pil);
 
+#ifdef CONFIG_SMP
+       /*
+        * Check IPI data structures after IRQ has been cleared. Hard and Soft
+        * IRQ can happen at the same time, so both cases are always handled.
+        */
+       if (pil == SUN4D_IPI_IRQ)
+               sun4d_ipi_interrupt();
+#endif
+
        old_regs = set_irq_regs(regs);
        irq_enter();
-       kstat_cpu(cpu).irqs[pil]++;
-       if (!sbusl) {
-               action = *(pil + irq_action);
-               if (!action)
-                       unexpected_irq(pil, NULL, regs);
-               do {
-                       action->handler(pil, action->dev_id);
-                       action = action->next;
-               } while (action);
+       if (sbusl == 0) {
+               /* cpu interrupt */
+               struct irq_bucket *p;
+
+               p = irq_map[pil];
+               while (p) {
+                       struct irq_bucket *next;
+
+                       next = p->next;
+                       generic_handle_irq(p->irq);
+                       p = next;
+               }
        } else {
-               int bus_mask = bw_get_intr_mask(sbusl) & 0x3ffff;
-               int sbino;
-               struct sbus_action *actionp;
-               unsigned mask, slot;
-               int sbil = (sbusl << 2);
-
-               bw_clear_intr_mask(sbusl, bus_mask);
-
-               /* Loop for each pending SBI */
-               for (sbino = 0; bus_mask; sbino++, bus_mask >>= 1)
-                       if (bus_mask & 1) {
-                               mask = acquire_sbi(SBI2DEVID(sbino), 0xf << sbil);
-                               mask &= (0xf << sbil);
-                               actionp = sbus_actions + (sbino << 5) + (sbil);
-                               /* Loop for each pending SBI slot */
-                               for (slot = (1 << sbil); mask; slot <<= 1, actionp++)
-                                       if (mask & slot) {
-                                               mask &= ~slot;
-                                               action = actionp->action;
-
-                                               if (!action)
-                                                       unexpected_irq(pil, NULL, regs);
-                                               do {
-                                                       action->handler(pil, action->dev_id);
-                                                       action = action->next;
-                                               } while (action);
-                                               release_sbi(SBI2DEVID(sbino), slot);
-                                       }
-                       }
+               /* SBUS interrupt */
+               sun4d_sbus_handler_irq(sbusl);
        }
        irq_exit();
        set_irq_regs(old_regs);
 }
 
-int sun4d_request_irq(unsigned int irq,
-               irq_handler_t handler,
-               unsigned long irqflags, const char *devname, void *dev_id)
+
+static void sun4d_mask_irq(struct irq_data *data)
 {
-       struct irqaction *action, *tmp = NULL, **actionp;
+       struct sun4d_handler_data *handler_data = data->handler_data;
+       unsigned int real_irq;
+#ifdef CONFIG_SMP
+       int cpuid = handler_data->cpuid;
        unsigned long flags;
-       int ret;
-
-       if (irq > 14 && irq < (1 << 5)) {
-               ret = -EINVAL;
-               goto out;
-       }
-
-       if (!handler) {
-               ret = -EINVAL;
-               goto out;
-       }
-
-       spin_lock_irqsave(&irq_action_lock, flags);
-
-       if (irq >= (1 << 5))
-               actionp = &(sbus_actions[irq - (1 << 5)].action);
-       else
-               actionp = irq + irq_action;
-       action = *actionp;
-
-       if (action) {
-               if ((action->flags & IRQF_SHARED) && (irqflags & IRQF_SHARED)) {
-                       for (tmp = action; tmp->next; tmp = tmp->next)
-                               /* find last entry - tmp used below */;
-               } else {
-                       ret = -EBUSY;
-                       goto out_unlock;
-               }
-               if ((action->flags & IRQF_DISABLED) ^ (irqflags & IRQF_DISABLED)) {
-                       printk(KERN_ERR "Attempt to mix fast and slow interrupts on IRQ%d denied\n",
-                              irq);
-                       ret = -EBUSY;
-                       goto out_unlock;
-               }
-               action = NULL;          /* Or else! */
-       }
-
-       /* If this is flagged as statically allocated then we use our
-        * private struct which is never freed.
-        */
-       if (irqflags & SA_STATIC_ALLOC) {
-               if (static_irq_count < MAX_STATIC_ALLOC)
-                       action = &static_irqaction[static_irq_count++];
-               else
-                       printk(KERN_ERR "Request for IRQ%d (%s) SA_STATIC_ALLOC failed using kmalloc\n",
-                              irq, devname);
-       }
-
-       if (action == NULL)
-               action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
-
-       if (!action) {
-               ret = -ENOMEM;
-               goto out_unlock;
-       }
-
-       action->handler = handler;
-       action->flags = irqflags;
-       action->name = devname;
-       action->next = NULL;
-       action->dev_id = dev_id;
-
-       if (tmp)
-               tmp->next = action;
-       else
-               *actionp = action;
-
-       __enable_irq(irq);
-
-       ret = 0;
-out_unlock:
-       spin_unlock_irqrestore(&irq_action_lock, flags);
-out:
-       return ret;
+#endif
+       real_irq = handler_data->real_irq;
+#ifdef CONFIG_SMP
+       spin_lock_irqsave(&sun4d_imsk_lock, flags);
+       cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | (1 << real_irq));
+       spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+#else
+       cc_set_imsk(cc_get_imsk() | (1 << real_irq));
+#endif
 }
 
-static void sun4d_disable_irq(unsigned int irq)
+static void sun4d_unmask_irq(struct irq_data *data)
 {
-       int tid = sbus_tid[(irq >> 5) - 1];
+       struct sun4d_handler_data *handler_data = data->handler_data;
+       unsigned int real_irq;
+#ifdef CONFIG_SMP
+       int cpuid = handler_data->cpuid;
        unsigned long flags;
+#endif
+       real_irq = handler_data->real_irq;
 
-       if (irq < NR_IRQS)
-               return;
-
+#ifdef CONFIG_SMP
        spin_lock_irqsave(&sun4d_imsk_lock, flags);
-       cc_set_imsk_other(tid, cc_get_imsk_other(tid) | (1 << sbus_to_pil[(irq >> 2) & 7]));
+       cc_set_imsk_other(cpuid, cc_get_imsk_other(cpuid) | ~(1 << real_irq));
        spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+#else
+       cc_set_imsk(cc_get_imsk() | ~(1 << real_irq));
+#endif
 }
 
-static void sun4d_enable_irq(unsigned int irq)
+static unsigned int sun4d_startup_irq(struct irq_data *data)
 {
-       int tid = sbus_tid[(irq >> 5) - 1];
-       unsigned long flags;
-
-       if (irq < NR_IRQS)
-               return;
+       irq_link(data->irq);
+       sun4d_unmask_irq(data);
+       return 0;
+}
 
-       spin_lock_irqsave(&sun4d_imsk_lock, flags);
-       cc_set_imsk_other(tid, cc_get_imsk_other(tid) & ~(1 << sbus_to_pil[(irq >> 2) & 7]));
-       spin_unlock_irqrestore(&sun4d_imsk_lock, flags);
+static void sun4d_shutdown_irq(struct irq_data *data)
+{
+       sun4d_mask_irq(data);
+       irq_unlink(data->irq);
 }
 
+struct irq_chip sun4d_irq = {
+       .name           = "sun4d",
+       .irq_startup    = sun4d_startup_irq,
+       .irq_shutdown   = sun4d_shutdown_irq,
+       .irq_unmask     = sun4d_unmask_irq,
+       .irq_mask       = sun4d_mask_irq,
+};
+
 #ifdef CONFIG_SMP
 static void sun4d_set_cpu_int(int cpu, int level)
 {
@@ -413,7 +272,7 @@ void __init sun4d_distribute_irqs(void)
        for_each_node_by_name(dp, "sbi") {
                int devid = of_getintprop_default(dp, "device-id", 0);
                int board = of_getintprop_default(dp, "board#", 0);
-               sbus_tid[board] = cpuid;
+               board_to_cpu[board] = cpuid;
                set_sbi_tid(devid, cpuid << 3);
        }
        printk(KERN_ERR "All sbus IRQs directed to CPU%d\n", cpuid);
@@ -443,15 +302,16 @@ static void __init sun4d_load_profile_irqs(void)
 unsigned int sun4d_build_device_irq(struct platform_device *op,
                                     unsigned int real_irq)
 {
-       static int pil_to_sbus[] = {
-               0, 0, 1, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 0,
-       };
        struct device_node *dp = op->dev.of_node;
        struct device_node *io_unit, *sbi = dp->parent;
        const struct linux_prom_registers *regs;
+       struct sun4d_handler_data *handler_data;
+       unsigned int pil;
+       unsigned int irq;
        int board, slot;
        int sbusl;
 
+       irq = 0;
        while (sbi) {
                if (!strcmp(sbi->name, "sbi"))
                        break;
@@ -484,7 +344,28 @@ unsigned int sun4d_build_device_irq(struct platform_device *op,
 
        sbusl = pil_to_sbus[real_irq];
        if (sbusl)
-               return (((board + 1) << 5) + (sbusl << 2) + slot);
+               pil = sun4d_encode_irq(board, sbusl, slot);
+       else
+               pil = real_irq;
+
+       irq = irq_alloc(real_irq, pil);
+       if (irq == 0)
+               goto err_out;
+
+       handler_data = irq_get_handler_data(irq);
+       if (unlikely(handler_data))
+               goto err_out;
+
+       handler_data = kzalloc(sizeof(struct sun4d_handler_data), GFP_ATOMIC);
+       if (unlikely(!handler_data)) {
+               prom_printf("IRQ: kzalloc(sun4d_handler_data) failed.\n");
+               prom_halt();
+       }
+       handler_data->cpuid    = board_to_cpu[board];
+       handler_data->real_irq = real_irq;
+       irq_set_chip_and_handler_name(irq, &sun4d_irq,
+                                     handle_level_irq, "level");
+       irq_set_handler_data(irq, handler_data);
 
 err_out:
        return real_irq;
@@ -518,6 +399,7 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
 {
        struct device_node *dp;
        struct resource res;
+       unsigned int irq;
        const u32 *reg;
        int err;
 
@@ -552,9 +434,8 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
 
        master_l10_counter = &sun4d_timers->l10_cur_count;
 
-       err = request_irq(TIMER_IRQ, counter_fn,
-                         (IRQF_DISABLED | SA_STATIC_ALLOC),
-                         "timer", NULL);
+       irq = sun4d_build_device_irq(NULL, SUN4D_TIMER_IRQ);
+       err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
        if (err) {
                prom_printf("sun4d_init_timers: request_irq() failed with %d\n",
                             err);
@@ -567,27 +448,16 @@ static void __init sun4d_init_timers(irq_handler_t counter_fn)
 void __init sun4d_init_sbi_irq(void)
 {
        struct device_node *dp;
-       int target_cpu = 0;
+       int target_cpu;
 
-#ifdef CONFIG_SMP
        target_cpu = boot_cpu_id;
-#endif
-
-       nsbi = 0;
-       for_each_node_by_name(dp, "sbi")
-               nsbi++;
-       sbus_actions = kzalloc(nsbi * 8 * 4 * sizeof(struct sbus_action), GFP_ATOMIC);
-       if (!sbus_actions) {
-               prom_printf("SUN4D: Cannot allocate sbus_actions, halting.\n");
-               prom_halt();
-       }
        for_each_node_by_name(dp, "sbi") {
                int devid = of_getintprop_default(dp, "device-id", 0);
                int board = of_getintprop_default(dp, "board#", 0);
                unsigned int mask;
 
                set_sbi_tid(devid, target_cpu << 3);
-               sbus_tid[board] = target_cpu;
+               board_to_cpu[board] = target_cpu;
 
                /* Get rid of pending irqs from PROM */
                mask = acquire_sbi(devid, 0xffffffff);
@@ -603,12 +473,10 @@ void __init sun4d_init_IRQ(void)
 {
        local_irq_disable();
 
-       BTFIXUPSET_CALL(enable_irq, sun4d_enable_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(disable_irq, sun4d_disable_irq, BTFIXUPCALL_NORM);
        BTFIXUPSET_CALL(clear_clock_irq, sun4d_clear_clock_irq, BTFIXUPCALL_NORM);
        BTFIXUPSET_CALL(load_profile_irq, sun4d_load_profile_irq, BTFIXUPCALL_NORM);
 
-       sparc_irq_config.init_timers = sun4d_init_timers;
+       sparc_irq_config.init_timers      = sun4d_init_timers;
        sparc_irq_config.build_device_irq = sun4d_build_device_irq;
 
 #ifdef CONFIG_SMP
index 475d50b..1333879 100644 (file)
@@ -32,6 +32,7 @@ static inline unsigned long sun4d_swap(volatile unsigned long *ptr, unsigned lon
        return val;
 }
 
+static void smp4d_ipi_init(void);
 static void smp_setup_percpu_timer(void);
 
 static unsigned char cpu_leds[32];
@@ -80,8 +81,6 @@ void __cpuinit smp4d_callin(void)
        local_flush_cache_all();
        local_flush_tlb_all();
 
-       cpu_probe();
-
        while ((unsigned long)current_set[cpuid] < PAGE_OFFSET)
                barrier();
 
@@ -105,7 +104,7 @@ void __cpuinit smp4d_callin(void)
 
        local_irq_enable();     /* We don't allow PIL 14 yet */
 
-       while (!cpu_isset(cpuid, smp_commenced_mask))
+       while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
                barrier();
 
        spin_lock_irqsave(&sun4d_imsk_lock, flags);
@@ -120,6 +119,7 @@ void __cpuinit smp4d_callin(void)
  */
 void __init smp4d_boot_cpus(void)
 {
+       smp4d_ipi_init();
        if (boot_cpu_id)
                current_set[0] = NULL;
        smp_setup_percpu_timer();
@@ -191,6 +191,80 @@ void __init smp4d_smp_done(void)
        sun4d_distribute_irqs();
 }
 
+/* Memory structure giving interrupt handler information about IPI generated */
+struct sun4d_ipi_work {
+       int single;
+       int msk;
+       int resched;
+};
+
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct sun4d_ipi_work, sun4d_ipi_work);
+
+/* Initialize IPIs on the SUN4D SMP machine */
+static void __init smp4d_ipi_init(void)
+{
+       int cpu;
+       struct sun4d_ipi_work *work;
+
+       printk(KERN_INFO "smp4d: setup IPI at IRQ %d\n", SUN4D_IPI_IRQ);
+
+       for_each_possible_cpu(cpu) {
+               work = &per_cpu(sun4d_ipi_work, cpu);
+               work->single = work->msk = work->resched = 0;
+       }
+}
+
+void sun4d_ipi_interrupt(void)
+{
+       struct sun4d_ipi_work *work = &__get_cpu_var(sun4d_ipi_work);
+
+       if (work->single) {
+               work->single = 0;
+               smp_call_function_single_interrupt();
+       }
+       if (work->msk) {
+               work->msk = 0;
+               smp_call_function_interrupt();
+       }
+       if (work->resched) {
+               work->resched = 0;
+               smp_resched_interrupt();
+       }
+}
+
+static void smp4d_ipi_single(int cpu)
+{
+       struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+       /* Mark work */
+       work->single = 1;
+
+       /* Generate IRQ on the CPU */
+       sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
+static void smp4d_ipi_mask_one(int cpu)
+{
+       struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+       /* Mark work */
+       work->msk = 1;
+
+       /* Generate IRQ on the CPU */
+       sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
+static void smp4d_ipi_resched(int cpu)
+{
+       struct sun4d_ipi_work *work = &per_cpu(sun4d_ipi_work, cpu);
+
+       /* Mark work */
+       work->resched = 1;
+
+       /* Generate IRQ on the CPU (any IRQ will cause resched) */
+       sun4d_send_ipi(cpu, SUN4D_IPI_IRQ);
+}
+
 static struct smp_funcall {
        smpfunc_t func;
        unsigned long arg1;
@@ -239,10 +313,10 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
                {
                        register int i;
 
-                       cpu_clear(smp_processor_id(), mask);
-                       cpus_and(mask, cpu_online_map, mask);
+                       cpumask_clear_cpu(smp_processor_id(), &mask);
+                       cpumask_and(&mask, cpu_online_mask, &mask);
                        for (i = 0; i <= high; i++) {
-                               if (cpu_isset(i, mask)) {
+                               if (cpumask_test_cpu(i, &mask)) {
                                        ccall_info.processors_in[i] = 0;
                                        ccall_info.processors_out[i] = 0;
                                        sun4d_send_ipi(i, IRQ_CROSS_CALL);
@@ -255,7 +329,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 
                        i = 0;
                        do {
-                               if (!cpu_isset(i, mask))
+                               if (!cpumask_test_cpu(i, &mask))
                                        continue;
                                while (!ccall_info.processors_in[i])
                                        barrier();
@@ -263,7 +337,7 @@ static void smp4d_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 
                        i = 0;
                        do {
-                               if (!cpu_isset(i, mask))
+                               if (!cpumask_test_cpu(i, &mask))
                                        continue;
                                while (!ccall_info.processors_out[i])
                                        barrier();
@@ -356,6 +430,9 @@ void __init sun4d_init_smp(void)
        BTFIXUPSET_BLACKBOX(load_current, smp4d_blackbox_current);
        BTFIXUPSET_CALL(smp_cross_call, smp4d_cross_call, BTFIXUPCALL_NORM);
        BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4d_processor_id, BTFIXUPCALL_NORM);
+       BTFIXUPSET_CALL(smp_ipi_resched, smp4d_ipi_resched, BTFIXUPCALL_NORM);
+       BTFIXUPSET_CALL(smp_ipi_single, smp4d_ipi_single, BTFIXUPCALL_NORM);
+       BTFIXUPSET_CALL(smp_ipi_mask_one, smp4d_ipi_mask_one, BTFIXUPCALL_NORM);
 
        for (i = 0; i < NR_CPUS; i++) {
                ccall_info.processors_in[i] = 1;
index 69df625..422c16d 100644 (file)
 struct sun4m_irq_percpu __iomem *sun4m_irq_percpu[SUN4M_NCPUS];
 struct sun4m_irq_global __iomem *sun4m_irq_global;
 
+struct sun4m_handler_data {
+       bool    percpu;
+       long    mask;
+};
+
 /* Dave Redman (djhr@tadpole.co.uk)
  * The sun4m interrupt registers.
  */
@@ -142,9 +147,9 @@ struct sun4m_irq_global __iomem *sun4m_irq_global;
 #define        OBP_INT_LEVEL_VME       0x40
 
 #define SUN4M_TIMER_IRQ         (OBP_INT_LEVEL_ONBOARD | 10)
-#define SUM4M_PROFILE_IRQ       (OBP_INT_LEVEL_ONBOARD | 14)
+#define SUN4M_PROFILE_IRQ       (OBP_INT_LEVEL_ONBOARD | 14)
 
-static unsigned long irq_mask[0x50] = {
+static unsigned long sun4m_imask[0x50] = {
        /* 0x00 - SMP */
        0,  SUN4M_SOFT_INT(1),
        SUN4M_SOFT_INT(2),  SUN4M_SOFT_INT(3),
@@ -169,7 +174,7 @@ static unsigned long irq_mask[0x50] = {
        SUN4M_INT_VIDEO, SUN4M_INT_MODULE,
        SUN4M_INT_REALTIME, SUN4M_INT_FLOPPY,
        (SUN4M_INT_SERIAL | SUN4M_INT_KBDMS),
-       SUN4M_INT_AUDIO, 0, SUN4M_INT_MODULE_ERR,
+       SUN4M_INT_AUDIO, SUN4M_INT_E14, SUN4M_INT_MODULE_ERR,
        /* 0x30 - sbus */
        0, 0, SUN4M_INT_SBUS(0), SUN4M_INT_SBUS(1),
        0, SUN4M_INT_SBUS(2), 0, SUN4M_INT_SBUS(3),
@@ -182,105 +187,110 @@ static unsigned long irq_mask[0x50] = {
        0, SUN4M_INT_VME(6), 0, 0
 };
 
-static unsigned long sun4m_get_irqmask(unsigned int irq)
+static void sun4m_mask_irq(struct irq_data *data)
 {
-       unsigned long mask;
-
-       if (irq < 0x50)
-               mask = irq_mask[irq];
-       else
-               mask = 0;
+       struct sun4m_handler_data *handler_data = data->handler_data;
+       int cpu = smp_processor_id();
 
-       if (!mask)
-               printk(KERN_ERR "sun4m_get_irqmask: IRQ%d has no valid mask!\n",
-                      irq);
+       if (handler_data->mask) {
+               unsigned long flags;
 
-       return mask;
+               local_irq_save(flags);
+               if (handler_data->percpu) {
+                       sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->set);
+               } else {
+                       sbus_writel(handler_data->mask, &sun4m_irq_global->mask_set);
+               }
+               local_irq_restore(flags);
+       }
 }
 
-static void sun4m_disable_irq(unsigned int irq_nr)
+static void sun4m_unmask_irq(struct irq_data *data)
 {
-       unsigned long mask, flags;
+       struct sun4m_handler_data *handler_data = data->handler_data;
        int cpu = smp_processor_id();
 
-       mask = sun4m_get_irqmask(irq_nr);
-       local_irq_save(flags);
-       if (irq_nr > 15)
-               sbus_writel(mask, &sun4m_irq_global->mask_set);
-       else
-               sbus_writel(mask, &sun4m_irq_percpu[cpu]->set);
-       local_irq_restore(flags);
-}
-
-static void sun4m_enable_irq(unsigned int irq_nr)
-{
-       unsigned long mask, flags;
-       int cpu = smp_processor_id();
+       if (handler_data->mask) {
+               unsigned long flags;
 
-       /* Dreadful floppy hack. When we use 0x2b instead of
-        * 0x0b the system blows (it starts to whistle!).
-        * So we continue to use 0x0b. Fixme ASAP. --P3
-        */
-       if (irq_nr != 0x0b) {
-               mask = sun4m_get_irqmask(irq_nr);
-               local_irq_save(flags);
-               if (irq_nr > 15)
-                       sbus_writel(mask, &sun4m_irq_global->mask_clear);
-               else
-                       sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear);
-               local_irq_restore(flags);
-       } else {
                local_irq_save(flags);
-               sbus_writel(SUN4M_INT_FLOPPY, &sun4m_irq_global->mask_clear);
+               if (handler_data->percpu) {
+                       sbus_writel(handler_data->mask, &sun4m_irq_percpu[cpu]->clear);
+               } else {
+                       sbus_writel(handler_data->mask, &sun4m_irq_global->mask_clear);
+               }
                local_irq_restore(flags);
        }
 }
 
-static unsigned long cpu_pil_to_imask[16] = {
-/*0*/  0x00000000,
-/*1*/  0x00000000,
-/*2*/  SUN4M_INT_SBUS(0) | SUN4M_INT_VME(0),
-/*3*/  SUN4M_INT_SBUS(1) | SUN4M_INT_VME(1),
-/*4*/  SUN4M_INT_SCSI,
-/*5*/  SUN4M_INT_SBUS(2) | SUN4M_INT_VME(2),
-/*6*/  SUN4M_INT_ETHERNET,
-/*7*/  SUN4M_INT_SBUS(3) | SUN4M_INT_VME(3),
-/*8*/  SUN4M_INT_VIDEO,
-/*9*/  SUN4M_INT_SBUS(4) | SUN4M_INT_VME(4) | SUN4M_INT_MODULE_ERR,
-/*10*/ SUN4M_INT_REALTIME,
-/*11*/ SUN4M_INT_SBUS(5) | SUN4M_INT_VME(5) | SUN4M_INT_FLOPPY,
-/*12*/ SUN4M_INT_SERIAL  | SUN4M_INT_KBDMS,
-/*13*/ SUN4M_INT_SBUS(6) | SUN4M_INT_VME(6) | SUN4M_INT_AUDIO,
-/*14*/ SUN4M_INT_E14,
-/*15*/ SUN4M_INT_ERROR,
-};
+static unsigned int sun4m_startup_irq(struct irq_data *data)
+{
+       irq_link(data->irq);
+       sun4m_unmask_irq(data);
+       return 0;
+}
 
-/* We assume the caller has disabled local interrupts when these are called,
- * or else very bizarre behavior will result.
- */
-static void sun4m_disable_pil_irq(unsigned int pil)
+static void sun4m_shutdown_irq(struct irq_data *data)
 {
-       sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_set);
+       sun4m_mask_irq(data);
+       irq_unlink(data->irq);
 }
 
-static void sun4m_enable_pil_irq(unsigned int pil)
+static struct irq_chip sun4m_irq = {
+       .name           = "sun4m",
+       .irq_startup    = sun4m_startup_irq,
+       .irq_shutdown   = sun4m_shutdown_irq,
+       .irq_mask       = sun4m_mask_irq,
+       .irq_unmask     = sun4m_unmask_irq,
+};
+
+
+static unsigned int sun4m_build_device_irq(struct platform_device *op,
+                                          unsigned int real_irq)
 {
-       sbus_writel(cpu_pil_to_imask[pil], &sun4m_irq_global->mask_clear);
+       struct sun4m_handler_data *handler_data;
+       unsigned int irq;
+       unsigned int pil;
+
+       if (real_irq >= OBP_INT_LEVEL_VME) {
+               prom_printf("Bogus sun4m IRQ %u\n", real_irq);
+               prom_halt();
+       }
+       pil = (real_irq & 0xf);
+       irq = irq_alloc(real_irq, pil);
+
+       if (irq == 0)
+               goto out;
+
+       handler_data = irq_get_handler_data(irq);
+       if (unlikely(handler_data))
+               goto out;
+
+       handler_data = kzalloc(sizeof(struct sun4m_handler_data), GFP_ATOMIC);
+       if (unlikely(!handler_data)) {
+               prom_printf("IRQ: kzalloc(sun4m_handler_data) failed.\n");
+               prom_halt();
+       }
+
+       handler_data->mask = sun4m_imask[real_irq];
+       handler_data->percpu = real_irq < OBP_INT_LEVEL_ONBOARD;
+       irq_set_chip_and_handler_name(irq, &sun4m_irq,
+                                     handle_level_irq, "level");
+       irq_set_handler_data(irq, handler_data);
+
+out:
+       return irq;
 }
 
 #ifdef CONFIG_SMP
 static void sun4m_send_ipi(int cpu, int level)
 {
-       unsigned long mask = sun4m_get_irqmask(level);
-
-       sbus_writel(mask, &sun4m_irq_percpu[cpu]->set);
+       sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->set);
 }
 
 static void sun4m_clear_ipi(int cpu, int level)
 {
-       unsigned long mask = sun4m_get_irqmask(level);
-
-       sbus_writel(mask, &sun4m_irq_percpu[cpu]->clear);
+       sbus_writel(SUN4M_SOFT_INT(level), &sun4m_irq_percpu[cpu]->clear);
 }
 
 static void sun4m_set_udt(int cpu)
@@ -343,7 +353,15 @@ void sun4m_nmi(struct pt_regs *regs)
        prom_halt();
 }
 
-/* Exported for sun4m_smp.c */
+void sun4m_unmask_profile_irq(void)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       sbus_writel(sun4m_imask[SUN4M_PROFILE_IRQ], &sun4m_irq_global->mask_clear);
+       local_irq_restore(flags);
+}
+
 void sun4m_clear_profile_irq(int cpu)
 {
        sbus_readl(&timers_percpu[cpu]->l14_limit);
@@ -358,6 +376,7 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn)
 {
        struct device_node *dp = of_find_node_by_name(NULL, "counter");
        int i, err, len, num_cpu_timers;
+       unsigned int irq;
        const u32 *addr;
 
        if (!dp) {
@@ -384,8 +403,9 @@ static void __init sun4m_init_timers(irq_handler_t counter_fn)
 
        master_l10_counter = &timers_global->l10_count;
 
-       err = request_irq(SUN4M_TIMER_IRQ, counter_fn,
-                         (IRQF_DISABLED | SA_STATIC_ALLOC), "timer", NULL);
+       irq = sun4m_build_device_irq(NULL, SUN4M_TIMER_IRQ);
+
+       err = request_irq(irq, counter_fn, IRQF_TIMER, "timer", NULL);
        if (err) {
                printk(KERN_ERR "sun4m_init_timers: Register IRQ error %d.\n",
                        err);
@@ -452,14 +472,11 @@ void __init sun4m_init_IRQ(void)
        if (num_cpu_iregs == 4)
                sbus_writel(0, &sun4m_irq_global->interrupt_target);
 
-       BTFIXUPSET_CALL(enable_irq, sun4m_enable_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(disable_irq, sun4m_disable_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(enable_pil_irq, sun4m_enable_pil_irq, BTFIXUPCALL_NORM);
-       BTFIXUPSET_CALL(disable_pil_irq, sun4m_disable_pil_irq, BTFIXUPCALL_NORM);
        BTFIXUPSET_CALL(clear_clock_irq, sun4m_clear_clock_irq, BTFIXUPCALL_NORM);
        BTFIXUPSET_CALL(load_profile_irq, sun4m_load_profile_irq, BTFIXUPCALL_NORM);
 
        sparc_irq_config.init_timers = sun4m_init_timers;
+       sparc_irq_config.build_device_irq = sun4m_build_device_irq;
 
 #ifdef CONFIG_SMP
        BTFIXUPSET_CALL(set_cpu_int, sun4m_send_ipi, BTFIXUPCALL_NORM);
index 5cc7dc5..5947686 100644 (file)
@@ -15,6 +15,9 @@
 #include "irq.h"
 #include "kernel.h"
 
+#define IRQ_IPI_SINGLE         12
+#define IRQ_IPI_MASK           13
+#define IRQ_IPI_RESCHED                14
 #define IRQ_CROSS_CALL         15
 
 static inline unsigned long
@@ -26,6 +29,7 @@ swap_ulong(volatile unsigned long *ptr, unsigned long val)
        return val;
 }
 
+static void smp4m_ipi_init(void);
 static void smp_setup_percpu_timer(void);
 
 void __cpuinit smp4m_callin(void)
@@ -59,8 +63,6 @@ void __cpuinit smp4m_callin(void)
        local_flush_cache_all();
        local_flush_tlb_all();
 
-       cpu_probe();
-
        /* Fix idle thread fields. */
        __asm__ __volatile__("ld [%0], %%g6\n\t"
                             : : "r" (&current_set[cpuid])
@@ -70,7 +72,7 @@ void __cpuinit smp4m_callin(void)
        atomic_inc(&init_mm.mm_count);
        current->active_mm = &init_mm;
 
-       while (!cpu_isset(cpuid, smp_commenced_mask))
+       while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
                mb();
 
        local_irq_enable();
@@ -83,6 +85,7 @@ void __cpuinit smp4m_callin(void)
  */
 void __init smp4m_boot_cpus(void)
 {
+       smp4m_ipi_init();
        smp_setup_percpu_timer();
        local_flush_cache_all();
 }
@@ -150,18 +153,25 @@ void __init smp4m_smp_done(void)
        /* Ok, they are spinning and ready to go. */
 }
 
-/* At each hardware IRQ, we get this called to forward IRQ reception
- * to the next processor.  The caller must disable the IRQ level being
- * serviced globally so that there are no double interrupts received.
- *
- * XXX See sparc64 irq.c.
- */
-void smp4m_irq_rotate(int cpu)
+
+/* Initialize IPIs on the SUN4M SMP machine */
+static void __init smp4m_ipi_init(void)
+{
+}
+
+static void smp4m_ipi_resched(int cpu)
+{
+       set_cpu_int(cpu, IRQ_IPI_RESCHED);
+}
+
+static void smp4m_ipi_single(int cpu)
 {
-       int next = cpu_data(cpu).next;
+       set_cpu_int(cpu, IRQ_IPI_SINGLE);
+}
 
-       if (next != cpu)
-               set_irq_udt(next);
+static void smp4m_ipi_mask_one(int cpu)
+{
+       set_cpu_int(cpu, IRQ_IPI_MASK);
 }
 
 static struct smp_funcall {
@@ -199,10 +209,10 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
                {
                        register int i;
 
-                       cpu_clear(smp_processor_id(), mask);
-                       cpus_and(mask, cpu_online_map, mask);
+                       cpumask_clear_cpu(smp_processor_id(), &mask);
+                       cpumask_and(&mask, cpu_online_mask, &mask);
                        for (i = 0; i < ncpus; i++) {
-                               if (cpu_isset(i, mask)) {
+                               if (cpumask_test_cpu(i, &mask)) {
                                        ccall_info.processors_in[i] = 0;
                                        ccall_info.processors_out[i] = 0;
                                        set_cpu_int(i, IRQ_CROSS_CALL);
@@ -218,7 +228,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 
                        i = 0;
                        do {
-                               if (!cpu_isset(i, mask))
+                               if (!cpumask_test_cpu(i, &mask))
                                        continue;
                                while (!ccall_info.processors_in[i])
                                        barrier();
@@ -226,7 +236,7 @@ static void smp4m_cross_call(smpfunc_t func, cpumask_t mask, unsigned long arg1,
 
                        i = 0;
                        do {
-                               if (!cpu_isset(i, mask))
+                               if (!cpumask_test_cpu(i, &mask))
                                        continue;
                                while (!ccall_info.processors_out[i])
                                        barrier();
@@ -277,7 +287,7 @@ static void __cpuinit smp_setup_percpu_timer(void)
        load_profile_irq(cpu, lvl14_resolution);
 
        if (cpu == boot_cpu_id)
-               enable_pil_irq(14);
+               sun4m_unmask_profile_irq();
 }
 
 static void __init smp4m_blackbox_id(unsigned *addr)
@@ -306,4 +316,7 @@ void __init sun4m_init_smp(void)
        BTFIXUPSET_BLACKBOX(load_current, smp4m_blackbox_current);
        BTFIXUPSET_CALL(smp_cross_call, smp4m_cross_call, BTFIXUPCALL_NORM);
        BTFIXUPSET_CALL(__hard_smp_processor_id, __smp4m_processor_id, BTFIXUPCALL_NORM);
+       BTFIXUPSET_CALL(smp_ipi_resched, smp4m_ipi_resched, BTFIXUPCALL_NORM);
+       BTFIXUPSET_CALL(smp_ipi_single, smp4m_ipi_single, BTFIXUPCALL_NORM);
+       BTFIXUPSET_CALL(smp_ipi_mask_one, smp4m_ipi_mask_one, BTFIXUPCALL_NORM);
 }
index 1eb8b00..7408201 100644 (file)
@@ -103,9 +103,10 @@ static unsigned long run_on_cpu(unsigned long cpu,
                                unsigned long (*func)(unsigned long),
                                unsigned long arg)
 {
-       cpumask_t old_affinity = current->cpus_allowed;
+       cpumask_t old_affinity;
        unsigned long ret;
 
+       cpumask_copy(&old_affinity, tsk_cpus_allowed(current));
        /* should return -EINVAL to userspace */
        if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
                return 0;
index 96046a4..1060e06 100644 (file)
@@ -228,14 +228,10 @@ static void __init sbus_time_init(void)
 
 void __init time_init(void)
 {
-#ifdef CONFIG_PCI
-       extern void pci_time_init(void);
-       if (pcic_present()) {
+       if (pcic_present())
                pci_time_init();
-               return;
-       }
-#endif
-       sbus_time_init();
+       else
+               sbus_time_init();
 }
 
 
index 8f982b7..531d54f 100644 (file)
@@ -237,7 +237,7 @@ static unsigned int us2e_freq_get(unsigned int cpu)
        if (!cpu_online(cpu))
                return 0;
 
-       cpus_allowed = current->cpus_allowed;
+       cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
        set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
        clock_tick = sparc64_get_clock_tick(cpu) / 1000;
@@ -258,7 +258,7 @@ static void us2e_set_cpu_divider_index(unsigned int cpu, unsigned int index)
        if (!cpu_online(cpu))
                return;
 
-       cpus_allowed = current->cpus_allowed;
+       cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
        set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
        new_freq = clock_tick = sparc64_get_clock_tick(cpu) / 1000;
index f35d1e7..9a8ceb7 100644 (file)
@@ -85,7 +85,7 @@ static unsigned int us3_freq_get(unsigned int cpu)
        if (!cpu_online(cpu))
                return 0;
 
-       cpus_allowed = current->cpus_allowed;
+       cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
        set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
        reg = read_safari_cfg();
@@ -105,7 +105,7 @@ static void us3_set_cpu_divider_index(unsigned int cpu, unsigned int index)
        if (!cpu_online(cpu))
                return;
 
-       cpus_allowed = current->cpus_allowed;
+       cpumask_copy(&cpus_allowed, tsk_cpus_allowed(current));
        set_cpus_allowed_ptr(current, cpumask_of(cpu));
 
        new_freq = sparc64_get_clock_tick(cpu) / 1000;
index 846d1c4..7f01b8f 100644 (file)
@@ -15,7 +15,6 @@ lib-$(CONFIG_SPARC32) += divdi3.o udivdi3.o
 lib-$(CONFIG_SPARC32) += copy_user.o locks.o
 lib-y                 += atomic_$(BITS).o
 lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
-lib-$(CONFIG_SPARC32) += rwsem_32.o
 lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
 
 lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
diff --git a/arch/sparc/lib/rwsem_32.S b/arch/sparc/lib/rwsem_32.S
deleted file mode 100644 (file)
index 9675268..0000000
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Assembly part of rw semaphores.
- *
- * Copyright (C) 1999 Jakub Jelinek (jakub@redhat.com)
- */
-
-#include <asm/ptrace.h>
-#include <asm/psr.h>
-
-       .section .sched.text, "ax"
-       .align  4
-
-       .globl          ___down_read
-___down_read:
-       rd              %psr, %g3
-       nop
-       nop
-       nop
-       or              %g3, PSR_PIL, %g7
-       wr              %g7, 0, %psr
-       nop
-       nop
-       nop
-#ifdef CONFIG_SMP
-1:     ldstub          [%g1 + 4], %g7
-       tst             %g7
-       bne             1b
-        ld             [%g1], %g7
-       sub             %g7, 1, %g7
-       st              %g7, [%g1]
-       stb             %g0, [%g1 + 4]
-#else
-       ld              [%g1], %g7
-       sub             %g7, 1, %g7
-       st              %g7, [%g1]
-#endif
-       wr              %g3, 0, %psr
-       add             %g7, 1, %g7
-       nop
-       nop
-       subcc           %g7, 1, %g7
-       bneg            3f
-        nop
-2:     jmpl            %o7, %g0
-        mov            %g4, %o7
-3:     save            %sp, -64, %sp
-       mov             %g1, %l1
-       mov             %g4, %l4
-       bcs             4f
-        mov            %g5, %l5
-       call            down_read_failed
-        mov            %l1, %o0
-       mov             %l1, %g1
-       mov             %l4, %g4
-       ba              ___down_read
-        restore        %l5, %g0, %g5
-4:     call            down_read_failed_biased
-        mov            %l1, %o0
-       mov             %l1, %g1
-       mov             %l4, %g4
-       ba              2b
-        restore        %l5, %g0, %g5
-
-       .globl          ___down_write
-___down_write:
-       rd              %psr, %g3
-       nop
-       nop
-       nop
-       or              %g3, PSR_PIL, %g7
-       wr              %g7, 0, %psr
-       sethi           %hi(0x01000000), %g2
-       nop
-       nop
-#ifdef CONFIG_SMP
-1:     ldstub          [%g1 + 4], %g7
-       tst             %g7
-       bne             1b
-        ld             [%g1], %g7
-       sub             %g7, %g2, %g7
-       st              %g7, [%g1]
-       stb             %g0, [%g1 + 4]
-#else
-       ld              [%g1], %g7
-       sub             %g7, %g2, %g7
-       st              %g7, [%g1]
-#endif
-       wr              %g3, 0, %psr
-       add             %g7, %g2, %g7
-       nop
-       nop
-       subcc           %g7, %g2, %g7
-       bne             3f
-        nop
-2:     jmpl            %o7, %g0
-        mov            %g4, %o7
-3:     save            %sp, -64, %sp
-       mov             %g1, %l1
-       mov             %g4, %l4
-       bcs             4f
-        mov            %g5, %l5
-       call            down_write_failed
-        mov            %l1, %o0
-       mov             %l1, %g1
-       mov             %l4, %g4
-       ba              ___down_write
-        restore        %l5, %g0, %g5
-4:     call            down_write_failed_biased
-        mov            %l1, %o0
-       mov             %l1, %g1
-       mov             %l4, %g4
-       ba              2b
-        restore        %l5, %g0, %g5
-
-       .text
-       .globl          ___up_read
-___up_read:
-       rd              %psr, %g3
-       nop
-       nop
-       nop
-       or              %g3, PSR_PIL, %g7
-       wr              %g7, 0, %psr
-       nop
-       nop
-       nop
-#ifdef CONFIG_SMP
-1:     ldstub          [%g1 + 4], %g7
-       tst             %g7
-       bne             1b
-        ld             [%g1], %g7
-       add             %g7, 1, %g7
-       st              %g7, [%g1]
-       stb             %g0, [%g1 + 4]
-#else
-       ld              [%g1], %g7
-       add             %g7, 1, %g7
-       st              %g7, [%g1]
-#endif
-       wr              %g3, 0, %psr
-       nop
-       nop
-       nop
-       cmp             %g7, 0
-       be              3f
-        nop
-2:     jmpl            %o7, %g0
-        mov            %g4, %o7
-3:     save            %sp, -64, %sp
-       mov             %g1, %l1
-       mov             %g4, %l4
-       mov             %g5, %l5
-       clr             %o1
-       call            __rwsem_wake
-        mov            %l1, %o0
-       mov             %l1, %g1
-       mov             %l4, %g4
-       ba              2b
-        restore        %l5, %g0, %g5
-
-       .globl          ___up_write
-___up_write:
-       rd              %psr, %g3
-       nop
-       nop
-       nop
-       or              %g3, PSR_PIL, %g7
-       wr              %g7, 0, %psr
-       sethi           %hi(0x01000000), %g2
-       nop
-       nop
-#ifdef CONFIG_SMP
-1:     ldstub          [%g1 + 4], %g7
-       tst             %g7
-       bne             1b
-        ld             [%g1], %g7
-       add             %g7, %g2, %g7
-       st              %g7, [%g1]
-       stb             %g0, [%g1 + 4]
-#else
-       ld              [%g1], %g7
-       add             %g7, %g2, %g7
-       st              %g7, [%g1]
-#endif
-       wr              %g3, 0, %psr
-       sub             %g7, %g2, %g7
-       nop
-       nop
-       addcc           %g7, %g2, %g7
-       bcs             3f
-        nop
-2:     jmpl            %o7, %g0
-        mov            %g4, %o7
-3:     save            %sp, -64, %sp
-       mov             %g1, %l1
-       mov             %g4, %l4
-       mov             %g5, %l5
-       mov             %g7, %o1
-       call            __rwsem_wake
-        mov            %l1, %o0
-       mov             %l1, %g1
-       mov             %l4, %g4
-       ba              2b
-        restore        %l5, %g0, %g5
index 2f6ae1d..e10cd03 100644 (file)
@@ -862,7 +862,7 @@ static void init_node_masks_nonnuma(void)
        for (i = 0; i < NR_CPUS; i++)
                numa_cpu_lookup_table[i] = 0;
 
-       numa_cpumask_lookup_table[0] = CPU_MASK_ALL;
+       cpumask_setall(&numa_cpumask_lookup_table[0]);
 }
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -1080,7 +1080,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
 {
        u64 arc;
 
-       cpus_clear(*mask);
+       cpumask_clear(mask);
 
        mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_BACK) {
                u64 target = mdesc_arc_target(md, arc);
@@ -1091,7 +1091,7 @@ static void __init numa_parse_mdesc_group_cpus(struct mdesc_handle *md,
                        continue;
                id = mdesc_get_property(md, target, "id", NULL);
                if (*id < nr_cpu_ids)
-                       cpu_set(*id, *mask);
+                       cpumask_set_cpu(*id, mask);
        }
 }
 
@@ -1153,13 +1153,13 @@ static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp,
 
        numa_parse_mdesc_group_cpus(md, grp, &mask);
 
-       for_each_cpu_mask(cpu, mask)
+       for_each_cpu(cpu, &mask)
                numa_cpu_lookup_table[cpu] = index;
-       numa_cpumask_lookup_table[index] = mask;
+       cpumask_copy(&numa_cpumask_lookup_table[index], &mask);
 
        if (numa_debug) {
                printk(KERN_INFO "NUMA GROUP[%d]: cpus [ ", index);
-               for_each_cpu_mask(cpu, mask)
+               for_each_cpu(cpu, &mask)
                        printk("%d ", cpu);
                printk("]\n");
        }
@@ -1218,7 +1218,7 @@ static int __init numa_parse_jbus(void)
        index = 0;
        for_each_present_cpu(cpu) {
                numa_cpu_lookup_table[cpu] = index;
-               numa_cpumask_lookup_table[index] = cpumask_of_cpu(cpu);
+               cpumask_copy(&numa_cpumask_lookup_table[index], cpumask_of(cpu));
                node_masks[index].mask = ~((1UL << 36UL) - 1UL);
                node_masks[index].val = cpu << 36UL;
 
index cbc70a2..c8b4162 100644 (file)
@@ -254,7 +254,7 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 }
 #endif
 
-static int disable_smep __initdata;
+static int disable_smep __cpuinitdata;
 static __init int setup_disable_smep(char *arg)
 {
        disable_smep = 1;
@@ -262,7 +262,7 @@ static __init int setup_disable_smep(char *arg)
 }
 __setup("nosmep", setup_disable_smep);
 
-static __init void setup_smep(struct cpuinfo_x86 *c)
+static __cpuinit void setup_smep(struct cpuinfo_x86 *c)
 {
        if (cpu_has(c, X86_FEATURE_SMEP)) {
                if (unlikely(disable_smep)) {
index b2699bb..d871b14 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/genhd.h>
 #include <net/tcp.h>
 #include <linux/lru_cache.h>
+#include <linux/prefetch.h>
 
 #ifdef __CHECKER__
 # define __protected_by(x)       __attribute__((require_context(x,1,999,"rdwr")))
index c9213ea..a4d6cb0 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/workqueue.h>
+#include <linux/prefetch.h>
 #include <linux/i7300_idle.h>
 #include "dma.h"
 #include "registers.h"
index effd140..f4a51d4 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/workqueue.h>
+#include <linux/prefetch.h>
 #include <linux/i7300_idle.h>
 #include "dma.h"
 #include "dma_v2.h"
index d0f4990..d845dc4 100644 (file)
@@ -60,6 +60,7 @@
 #include <linux/gfp.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
 #include "registers.h"
 #include "hw.h"
 #include "dma.h"
index c26c119..2af8cb4 100644 (file)
@@ -416,21 +416,21 @@ void ide_acpi_get_timing(ide_hwif_t *hwif)
 
        out_obj = output.pointer;
        if (out_obj->type != ACPI_TYPE_BUFFER) {
-               kfree(output.pointer);
                DEBPRINT("Run _GTM: error: "
                       "expected object type of ACPI_TYPE_BUFFER, "
                       "got 0x%x\n", out_obj->type);
+               kfree(output.pointer);
                return;
        }
 
        if (!out_obj->buffer.length || !out_obj->buffer.pointer ||
            out_obj->buffer.length != sizeof(struct GTM_buffer)) {
-               kfree(output.pointer);
                printk(KERN_ERR
                        "%s: unexpected _GTM length (0x%x)[should be 0x%zx] or "
                        "addr (0x%p)\n",
                        __func__, out_obj->buffer.length,
                        sizeof(struct GTM_buffer), out_obj->buffer.pointer);
+               kfree(output.pointer);
                return;
        }
 
index 5a702d0..61fdf54 100644 (file)
@@ -73,7 +73,7 @@ static int ide_floppy_callback(ide_drive_t *drive, int dsc)
                drive->failed_pc = NULL;
 
        if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 ||
-           (rq && rq->cmd_type == REQ_TYPE_BLOCK_PC))
+           rq->cmd_type == REQ_TYPE_BLOCK_PC)
                uptodate = 1; /* FIXME */
        else if (pc->c[0] == GPCMD_REQUEST_SENSE) {
 
index 0e79eff..c3da53e 100644 (file)
@@ -88,7 +88,7 @@ static int __init ide_scan_pcibus(void)
        struct list_head *l, *n;
 
        pre_init = 0;
-       while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)))
+       for_each_pci_dev(dev)
                ide_scan_pcidev(dev);
 
        /*
index ebcf8e4..1db7c43 100644 (file)
@@ -1334,7 +1334,7 @@ out_free_pmif:
 static int
 pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
 {
-       pmac_ide_hwif_t *pmif = (pmac_ide_hwif_t *)pci_get_drvdata(pdev);
+       pmac_ide_hwif_t *pmif = pci_get_drvdata(pdev);
        int rc = 0;
 
        if (mesg.event != pdev->dev.power.power_state.event
@@ -1350,7 +1350,7 @@ pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
 static int
 pmac_ide_pci_resume(struct pci_dev *pdev)
 {
-       pmac_ide_hwif_t *pmif = (pmac_ide_hwif_t *)pci_get_drvdata(pdev);
+       pmac_ide_hwif_t *pmif = pci_get_drvdata(pdev);
        int rc = 0;
 
        if (pdev->dev.power.power_state.event != PM_EVENT_ON) {
index dc85d77..0cfc455 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/init.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
+#include <linux/prefetch.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
index 5c93627..70bd738 100644 (file)
@@ -493,11 +493,11 @@ void bitmap_update_sb(struct bitmap *bitmap)
        spin_unlock_irqrestore(&bitmap->lock, flags);
        sb = kmap_atomic(bitmap->sb_page, KM_USER0);
        sb->events = cpu_to_le64(bitmap->mddev->events);
-       if (bitmap->mddev->events < bitmap->events_cleared) {
+       if (bitmap->mddev->events < bitmap->events_cleared)
                /* rocking back to read-only */
                bitmap->events_cleared = bitmap->mddev->events;
-               sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
-       }
+       sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
+       sb->state = cpu_to_le32(bitmap->flags);
        /* Just in case these have been changed via sysfs: */
        sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
        sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
@@ -618,7 +618,7 @@ success:
        if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
                bitmap->flags |= BITMAP_HOSTENDIAN;
        bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
-       if (sb->state & cpu_to_le32(BITMAP_STALE))
+       if (bitmap->flags & BITMAP_STALE)
                bitmap->events_cleared = bitmap->mddev->events;
        err = 0;
 out:
@@ -652,9 +652,11 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
        switch (op) {
        case MASK_SET:
                sb->state |= cpu_to_le32(bits);
+               bitmap->flags |= bits;
                break;
        case MASK_UNSET:
                sb->state &= cpu_to_le32(~bits);
+               bitmap->flags &= ~bits;
                break;
        default:
                BUG();
index 7d6f7f1..aa640a8 100644 (file)
@@ -3324,7 +3324,7 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len)
        char *e;
        unsigned long long n = simple_strtoull(buf, &e, 10);
 
-       if (mddev->pers)
+       if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
                return -EBUSY;
        if (cmd_match(buf, "none"))
                n = MaxSector;
@@ -4347,13 +4347,19 @@ static int md_alloc(dev_t dev, char *name)
        disk->fops = &md_fops;
        disk->private_data = mddev;
        disk->queue = mddev->queue;
+       blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
        /* Allow extended partitions.  This makes the
         * 'mdp' device redundant, but we can't really
         * remove it now.
         */
        disk->flags |= GENHD_FL_EXT_DEVT;
-       add_disk(disk);
        mddev->gendisk = disk;
+       /* As soon as we call add_disk(), another thread could get
+        * through to md_open, so make sure it doesn't get too far
+        */
+       mutex_lock(&mddev->open_mutex);
+       add_disk(disk);
+
        error = kobject_init_and_add(&mddev->kobj, &md_ktype,
                                     &disk_to_dev(disk)->kobj, "%s", "md");
        if (error) {
@@ -4367,8 +4373,7 @@ static int md_alloc(dev_t dev, char *name)
        if (mddev->kobj.sd &&
            sysfs_create_group(&mddev->kobj, &md_bitmap_group))
                printk(KERN_DEBUG "pointless warning\n");
-
-       blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
+       mutex_unlock(&mddev->open_mutex);
  abort:
        mutex_unlock(&disks_mutex);
        if (!error && mddev->kobj.sd) {
@@ -5211,6 +5216,16 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
                } else
                        super_types[mddev->major_version].
                                validate_super(mddev, rdev);
+               if ((info->state & (1<<MD_DISK_SYNC)) &&
+                   (!test_bit(In_sync, &rdev->flags) ||
+                    rdev->raid_disk != info->raid_disk)) {
+                       /* This was a hot-add request, but events doesn't
+                        * match, so reject it.
+                        */
+                       export_rdev(rdev);
+                       return -EINVAL;
+               }
+
                if (test_bit(In_sync, &rdev->flags))
                        rdev->saved_raid_disk = rdev->raid_disk;
                else
index c358909..3535c23 100644 (file)
@@ -146,7 +146,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev)
        int i;
        
        seq_printf (seq, " [%d/%d] [", conf->raid_disks,
-                                                conf->working_disks);
+                   conf->raid_disks - mddev->degraded);
        for (i = 0; i < conf->raid_disks; i++)
                seq_printf (seq, "%s",
                               conf->multipaths[i].rdev && 
@@ -186,35 +186,36 @@ static int multipath_congested(void *data, int bits)
 static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
 {
        multipath_conf_t *conf = mddev->private;
+       char b[BDEVNAME_SIZE];
 
-       if (conf->working_disks <= 1) {
+       if (conf->raid_disks - mddev->degraded <= 1) {
                /*
                 * Uh oh, we can do nothing if this is our last path, but
                 * first check if this is a queued request for a device
                 * which has just failed.
                 */
                printk(KERN_ALERT 
-                       "multipath: only one IO path left and IO error.\n");
+                      "multipath: only one IO path left and IO error.\n");
                /* leave it active... it's all we have */
-       } else {
-               /*
-                * Mark disk as unusable
-                */
-               if (!test_bit(Faulty, &rdev->flags)) {
-                       char b[BDEVNAME_SIZE];
-                       clear_bit(In_sync, &rdev->flags);
-                       set_bit(Faulty, &rdev->flags);
-                       set_bit(MD_CHANGE_DEVS, &mddev->flags);
-                       conf->working_disks--;
-                       mddev->degraded++;
-                       printk(KERN_ALERT "multipath: IO failure on %s,"
-                               " disabling IO path.\n"
-                               "multipath: Operation continuing"
-                               " on %d IO paths.\n",
-                               bdevname (rdev->bdev,b),
-                               conf->working_disks);
-               }
+               return;
+       }
+       /*
+        * Mark disk as unusable
+        */
+       if (test_and_clear_bit(In_sync, &rdev->flags)) {
+               unsigned long flags;
+               spin_lock_irqsave(&conf->device_lock, flags);
+               mddev->degraded++;
+               spin_unlock_irqrestore(&conf->device_lock, flags);
        }
+       set_bit(Faulty, &rdev->flags);
+       set_bit(MD_CHANGE_DEVS, &mddev->flags);
+       printk(KERN_ALERT "multipath: IO failure on %s,"
+              " disabling IO path.\n"
+              "multipath: Operation continuing"
+              " on %d IO paths.\n",
+              bdevname(rdev->bdev, b),
+              conf->raid_disks - mddev->degraded);
 }
 
 static void print_multipath_conf (multipath_conf_t *conf)
@@ -227,7 +228,7 @@ static void print_multipath_conf (multipath_conf_t *conf)
                printk("(conf==NULL)\n");
                return;
        }
-       printk(" --- wd:%d rd:%d\n", conf->working_disks,
+       printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
                         conf->raid_disks);
 
        for (i = 0; i < conf->raid_disks; i++) {
@@ -274,10 +275,11 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
                                                           PAGE_CACHE_SIZE - 1);
                        }
 
-                       conf->working_disks++;
+                       spin_lock_irq(&conf->device_lock);
                        mddev->degraded--;
                        rdev->raid_disk = path;
                        set_bit(In_sync, &rdev->flags);
+                       spin_unlock_irq(&conf->device_lock);
                        rcu_assign_pointer(p->rdev, rdev);
                        err = 0;
                        md_integrity_add_rdev(rdev, mddev);
@@ -391,6 +393,7 @@ static int multipath_run (mddev_t *mddev)
        int disk_idx;
        struct multipath_info *disk;
        mdk_rdev_t *rdev;
+       int working_disks;
 
        if (md_check_no_bitmap(mddev))
                return -EINVAL;
@@ -424,7 +427,7 @@ static int multipath_run (mddev_t *mddev)
                goto out_free_conf;
        }
 
-       conf->working_disks = 0;
+       working_disks = 0;
        list_for_each_entry(rdev, &mddev->disks, same_set) {
                disk_idx = rdev->raid_disk;
                if (disk_idx < 0 ||
@@ -446,7 +449,7 @@ static int multipath_run (mddev_t *mddev)
                }
 
                if (!test_bit(Faulty, &rdev->flags))
-                       conf->working_disks++;
+                       working_disks++;
        }
 
        conf->raid_disks = mddev->raid_disks;
@@ -454,12 +457,12 @@ static int multipath_run (mddev_t *mddev)
        spin_lock_init(&conf->device_lock);
        INIT_LIST_HEAD(&conf->retry_list);
 
-       if (!conf->working_disks) {
+       if (!working_disks) {
                printk(KERN_ERR "multipath: no operational IO paths for %s\n",
                        mdname(mddev));
                goto out_free_conf;
        }
-       mddev->degraded = conf->raid_disks - conf->working_disks;
+       mddev->degraded = conf->raid_disks - working_disks;
 
        conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS,
                                                 sizeof(struct multipath_bh));
@@ -481,7 +484,8 @@ static int multipath_run (mddev_t *mddev)
 
        printk(KERN_INFO 
                "multipath: array %s active with %d out of %d IO paths\n",
-               mdname(mddev), conf->working_disks, mddev->raid_disks);
+               mdname(mddev), conf->raid_disks - mddev->degraded,
+              mddev->raid_disks);
        /*
         * Ok, everything is just fine now
         */
index d1c2a8d..3c5a45e 100644 (file)
@@ -9,7 +9,6 @@ struct multipath_private_data {
        mddev_t                 *mddev;
        struct multipath_info   *multipaths;
        int                     raid_disks;
-       int                     working_disks;
        spinlock_t              device_lock;
        struct list_head        retry_list;
 
index 2b7a7ff..5d09609 100644 (file)
@@ -297,23 +297,24 @@ static void raid1_end_read_request(struct bio *bio, int error)
        rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
 }
 
-static void r1_bio_write_done(r1bio_t *r1_bio, int vcnt, struct bio_vec *bv,
-                             int behind)
+static void r1_bio_write_done(r1bio_t *r1_bio)
 {
        if (atomic_dec_and_test(&r1_bio->remaining))
        {
                /* it really is the end of this request */
                if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
                        /* free extra copy of the data pages */
-                       int i = vcnt;
+                       int i = r1_bio->behind_page_count;
                        while (i--)
-                               safe_put_page(bv[i].bv_page);
+                               safe_put_page(r1_bio->behind_pages[i]);
+                       kfree(r1_bio->behind_pages);
+                       r1_bio->behind_pages = NULL;
                }
                /* clear the bitmap if all writes complete successfully */
                bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
                                r1_bio->sectors,
                                !test_bit(R1BIO_Degraded, &r1_bio->state),
-                               behind);
+                               test_bit(R1BIO_BehindIO, &r1_bio->state));
                md_write_end(r1_bio->mddev);
                raid_end_bio_io(r1_bio);
        }
@@ -386,7 +387,7 @@ static void raid1_end_write_request(struct bio *bio, int error)
         * Let's see if all mirrored write operations have finished
         * already.
         */
-       r1_bio_write_done(r1_bio, bio->bi_vcnt, bio->bi_io_vec, behind);
+       r1_bio_write_done(r1_bio);
 
        if (to_put)
                bio_put(to_put);
@@ -411,10 +412,10 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 {
        const sector_t this_sector = r1_bio->sector;
        const int sectors = r1_bio->sectors;
-       int new_disk = -1;
        int start_disk;
+       int best_disk;
        int i;
-       sector_t new_distance, current_distance;
+       sector_t best_dist;
        mdk_rdev_t *rdev;
        int choose_first;
 
@@ -425,6 +426,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
         * We take the first readable disk when above the resync window.
         */
  retry:
+       best_disk = -1;
+       best_dist = MaxSector;
        if (conf->mddev->recovery_cp < MaxSector &&
            (this_sector + sectors >= conf->next_resync)) {
                choose_first = 1;
@@ -434,8 +437,8 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
                start_disk = conf->last_used;
        }
 
-       /* make sure the disk is operational */
        for (i = 0 ; i < conf->raid_disks ; i++) {
+               sector_t dist;
                int disk = start_disk + i;
                if (disk >= conf->raid_disks)
                        disk -= conf->raid_disks;
@@ -443,60 +446,43 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
                rdev = rcu_dereference(conf->mirrors[disk].rdev);
                if (r1_bio->bios[disk] == IO_BLOCKED
                    || rdev == NULL
-                   || !test_bit(In_sync, &rdev->flags))
+                   || test_bit(Faulty, &rdev->flags))
                        continue;
-
-               new_disk = disk;
-               if (!test_bit(WriteMostly, &rdev->flags))
-                       break;
-       }
-
-       if (new_disk < 0 || choose_first)
-               goto rb_out;
-
-       /*
-        * Don't change to another disk for sequential reads:
-        */
-       if (conf->next_seq_sect == this_sector)
-               goto rb_out;
-       if (this_sector == conf->mirrors[new_disk].head_position)
-               goto rb_out;
-
-       current_distance = abs(this_sector 
-                              - conf->mirrors[new_disk].head_position);
-
-       /* look for a better disk - i.e. head is closer */
-       start_disk = new_disk;
-       for (i = 1; i < conf->raid_disks; i++) {
-               int disk = start_disk + 1;
-               if (disk >= conf->raid_disks)
-                       disk -= conf->raid_disks;
-
-               rdev = rcu_dereference(conf->mirrors[disk].rdev);
-               if (r1_bio->bios[disk] == IO_BLOCKED
-                   || rdev == NULL
-                   || !test_bit(In_sync, &rdev->flags)
-                   || test_bit(WriteMostly, &rdev->flags))
+               if (!test_bit(In_sync, &rdev->flags) &&
+                   rdev->recovery_offset < this_sector + sectors)
                        continue;
-
-               if (!atomic_read(&rdev->nr_pending)) {
-                       new_disk = disk;
+               if (test_bit(WriteMostly, &rdev->flags)) {
+                       /* Don't balance among write-mostly, just
+                        * use the first as a last resort */
+                       if (best_disk < 0)
+                               best_disk = disk;
+                       continue;
+               }
+               /* This is a reasonable device to use.  It might
+                * even be best.
+                */
+               dist = abs(this_sector - conf->mirrors[disk].head_position);
+               if (choose_first
+                   /* Don't change to another disk for sequential reads */
+                   || conf->next_seq_sect == this_sector
+                   || dist == 0
+                   /* If device is idle, use it */
+                   || atomic_read(&rdev->nr_pending) == 0) {
+                       best_disk = disk;
                        break;
                }
-               new_distance = abs(this_sector - conf->mirrors[disk].head_position);
-               if (new_distance < current_distance) {
-                       current_distance = new_distance;
-                       new_disk = disk;
+               if (dist < best_dist) {
+                       best_dist = dist;
+                       best_disk = disk;
                }
        }
 
- rb_out:
-       if (new_disk >= 0) {
-               rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
+       if (best_disk >= 0) {
+               rdev = rcu_dereference(conf->mirrors[best_disk].rdev);
                if (!rdev)
                        goto retry;
                atomic_inc(&rdev->nr_pending);
-               if (!test_bit(In_sync, &rdev->flags)) {
+               if (test_bit(Faulty, &rdev->flags)) {
                        /* cannot risk returning a device that failed
                         * before we inc'ed nr_pending
                         */
@@ -504,11 +490,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
                        goto retry;
                }
                conf->next_seq_sect = this_sector + sectors;
-               conf->last_used = new_disk;
+               conf->last_used = best_disk;
        }
        rcu_read_unlock();
 
-       return new_disk;
+       return best_disk;
 }
 
 static int raid1_congested(void *data, int bits)
@@ -675,37 +661,36 @@ static void unfreeze_array(conf_t *conf)
 
 
 /* duplicate the data pages for behind I/O 
- * We return a list of bio_vec rather than just page pointers
- * as it makes freeing easier
  */
-static struct bio_vec *alloc_behind_pages(struct bio *bio)
+static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio)
 {
        int i;
        struct bio_vec *bvec;
-       struct bio_vec *pages = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec),
+       struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*),
                                        GFP_NOIO);
        if (unlikely(!pages))
-               goto do_sync_io;
+               return;
 
        bio_for_each_segment(bvec, bio, i) {
-               pages[i].bv_page = alloc_page(GFP_NOIO);
-               if (unlikely(!pages[i].bv_page))
+               pages[i] = alloc_page(GFP_NOIO);
+               if (unlikely(!pages[i]))
                        goto do_sync_io;
-               memcpy(kmap(pages[i].bv_page) + bvec->bv_offset,
+               memcpy(kmap(pages[i]) + bvec->bv_offset,
                        kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
-               kunmap(pages[i].bv_page);
+               kunmap(pages[i]);
                kunmap(bvec->bv_page);
        }
-
-       return pages;
+       r1_bio->behind_pages = pages;
+       r1_bio->behind_page_count = bio->bi_vcnt;
+       set_bit(R1BIO_BehindIO, &r1_bio->state);
+       return;
 
 do_sync_io:
-       if (pages)
-               for (i = 0; i < bio->bi_vcnt && pages[i].bv_page; i++)
-                       put_page(pages[i].bv_page);
+       for (i = 0; i < bio->bi_vcnt; i++)
+               if (pages[i])
+                       put_page(pages[i]);
        kfree(pages);
        PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
-       return NULL;
 }
 
 static int make_request(mddev_t *mddev, struct bio * bio)
@@ -717,7 +702,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        int i, targets = 0, disks;
        struct bitmap *bitmap;
        unsigned long flags;
-       struct bio_vec *behind_pages = NULL;
        const int rw = bio_data_dir(bio);
        const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
        const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
@@ -870,9 +854,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
        if (bitmap &&
            (atomic_read(&bitmap->behind_writes)
             < mddev->bitmap_info.max_write_behind) &&
-           !waitqueue_active(&bitmap->behind_wait) &&
-           (behind_pages = alloc_behind_pages(bio)) != NULL)
-               set_bit(R1BIO_BehindIO, &r1_bio->state);
+           !waitqueue_active(&bitmap->behind_wait))
+               alloc_behind_pages(bio, r1_bio);
 
        atomic_set(&r1_bio->remaining, 1);
        atomic_set(&r1_bio->behind_remaining, 0);
@@ -893,7 +876,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                mbio->bi_rw = WRITE | do_flush_fua | do_sync;
                mbio->bi_private = r1_bio;
 
-               if (behind_pages) {
+               if (r1_bio->behind_pages) {
                        struct bio_vec *bvec;
                        int j;
 
@@ -905,7 +888,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                         * them all
                         */
                        __bio_for_each_segment(bvec, mbio, j, 0)
-                               bvec->bv_page = behind_pages[j].bv_page;
+                               bvec->bv_page = r1_bio->behind_pages[j];
                        if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
                                atomic_inc(&r1_bio->behind_remaining);
                }
@@ -915,8 +898,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
                bio_list_add(&conf->pending_bio_list, mbio);
                spin_unlock_irqrestore(&conf->device_lock, flags);
        }
-       r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL);
-       kfree(behind_pages); /* the behind pages are attached to the bios now */
+       r1_bio_write_done(r1_bio);
 
        /* In case raid1d snuck in to freeze_array */
        wake_up(&conf->wait_barrier);
@@ -1196,194 +1178,210 @@ static void end_sync_write(struct bio *bio, int error)
        }
 }
 
-static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
+static int fix_sync_read_error(r1bio_t *r1_bio)
 {
+       /* Try some synchronous reads of other devices to get
+        * good data, much like with normal read errors.  Only
+        * read into the pages we already have so we don't
+        * need to re-issue the read request.
+        * We don't need to freeze the array, because being in an
+        * active sync request, there is no normal IO, and
+        * no overlapping syncs.
+        */
+       mddev_t *mddev = r1_bio->mddev;
        conf_t *conf = mddev->private;
-       int i;
-       int disks = conf->raid_disks;
-       struct bio *bio, *wbio;
-
-       bio = r1_bio->bios[r1_bio->read_disk];
+       struct bio *bio = r1_bio->bios[r1_bio->read_disk];
+       sector_t sect = r1_bio->sector;
+       int sectors = r1_bio->sectors;
+       int idx = 0;
 
+       while(sectors) {
+               int s = sectors;
+               int d = r1_bio->read_disk;
+               int success = 0;
+               mdk_rdev_t *rdev;
+               int start;
 
-       if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
-               /* We have read all readable devices.  If we haven't
-                * got the block, then there is no hope left.
-                * If we have, then we want to do a comparison
-                * and skip the write if everything is the same.
-                * If any blocks failed to read, then we need to
-                * attempt an over-write
-                */
-               int primary;
-               if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
-                       for (i=0; i<mddev->raid_disks; i++)
-                               if (r1_bio->bios[i]->bi_end_io == end_sync_read)
-                                       md_error(mddev, conf->mirrors[i].rdev);
+               if (s > (PAGE_SIZE>>9))
+                       s = PAGE_SIZE >> 9;
+               do {
+                       if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
+                               /* No rcu protection needed here devices
+                                * can only be removed when no resync is
+                                * active, and resync is currently active
+                                */
+                               rdev = conf->mirrors[d].rdev;
+                               if (sync_page_io(rdev,
+                                                sect,
+                                                s<<9,
+                                                bio->bi_io_vec[idx].bv_page,
+                                                READ, false)) {
+                                       success = 1;
+                                       break;
+                               }
+                       }
+                       d++;
+                       if (d == conf->raid_disks)
+                               d = 0;
+               } while (!success && d != r1_bio->read_disk);
 
-                       md_done_sync(mddev, r1_bio->sectors, 1);
+               if (!success) {
+                       char b[BDEVNAME_SIZE];
+                       /* Cannot read from anywhere, array is toast */
+                       md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+                       printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
+                              " for block %llu\n",
+                              mdname(mddev),
+                              bdevname(bio->bi_bdev, b),
+                              (unsigned long long)r1_bio->sector);
+                       md_done_sync(mddev, r1_bio->sectors, 0);
                        put_buf(r1_bio);
-                       return;
+                       return 0;
                }
-               for (primary=0; primary<mddev->raid_disks; primary++)
-                       if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
-                           test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
-                               r1_bio->bios[primary]->bi_end_io = NULL;
-                               rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
-                               break;
-                       }
-               r1_bio->read_disk = primary;
-               for (i=0; i<mddev->raid_disks; i++)
-                       if (r1_bio->bios[i]->bi_end_io == end_sync_read) {
-                               int j;
-                               int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
-                               struct bio *pbio = r1_bio->bios[primary];
-                               struct bio *sbio = r1_bio->bios[i];
-
-                               if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
-                                       for (j = vcnt; j-- ; ) {
-                                               struct page *p, *s;
-                                               p = pbio->bi_io_vec[j].bv_page;
-                                               s = sbio->bi_io_vec[j].bv_page;
-                                               if (memcmp(page_address(p),
-                                                          page_address(s),
-                                                          PAGE_SIZE))
-                                                       break;
-                                       }
-                               } else
-                                       j = 0;
-                               if (j >= 0)
-                                       mddev->resync_mismatches += r1_bio->sectors;
-                               if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
-                                             && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
-                                       sbio->bi_end_io = NULL;
-                                       rdev_dec_pending(conf->mirrors[i].rdev, mddev);
-                               } else {
-                                       /* fixup the bio for reuse */
-                                       int size;
-                                       sbio->bi_vcnt = vcnt;
-                                       sbio->bi_size = r1_bio->sectors << 9;
-                                       sbio->bi_idx = 0;
-                                       sbio->bi_phys_segments = 0;
-                                       sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
-                                       sbio->bi_flags |= 1 << BIO_UPTODATE;
-                                       sbio->bi_next = NULL;
-                                       sbio->bi_sector = r1_bio->sector +
-                                               conf->mirrors[i].rdev->data_offset;
-                                       sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
-                                       size = sbio->bi_size;
-                                       for (j = 0; j < vcnt ; j++) {
-                                               struct bio_vec *bi;
-                                               bi = &sbio->bi_io_vec[j];
-                                               bi->bv_offset = 0;
-                                               if (size > PAGE_SIZE)
-                                                       bi->bv_len = PAGE_SIZE;
-                                               else
-                                                       bi->bv_len = size;
-                                               size -= PAGE_SIZE;
-                                               memcpy(page_address(bi->bv_page),
-                                                      page_address(pbio->bi_io_vec[j].bv_page),
-                                                      PAGE_SIZE);
-                                       }
 
-                               }
-                       }
+               start = d;
+               /* write it back and re-read */
+               while (d != r1_bio->read_disk) {
+                       if (d == 0)
+                               d = conf->raid_disks;
+                       d--;
+                       if (r1_bio->bios[d]->bi_end_io != end_sync_read)
+                               continue;
+                       rdev = conf->mirrors[d].rdev;
+                       if (sync_page_io(rdev,
+                                        sect,
+                                        s<<9,
+                                        bio->bi_io_vec[idx].bv_page,
+                                        WRITE, false) == 0) {
+                               r1_bio->bios[d]->bi_end_io = NULL;
+                               rdev_dec_pending(rdev, mddev);
+                               md_error(mddev, rdev);
+                       } else
+                               atomic_add(s, &rdev->corrected_errors);
+               }
+               d = start;
+               while (d != r1_bio->read_disk) {
+                       if (d == 0)
+                               d = conf->raid_disks;
+                       d--;
+                       if (r1_bio->bios[d]->bi_end_io != end_sync_read)
+                               continue;
+                       rdev = conf->mirrors[d].rdev;
+                       if (sync_page_io(rdev,
+                                        sect,
+                                        s<<9,
+                                        bio->bi_io_vec[idx].bv_page,
+                                        READ, false) == 0)
+                               md_error(mddev, rdev);
+               }
+               sectors -= s;
+               sect += s;
+               idx ++;
        }
-       if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
-               /* ouch - failed to read all of that.
-                * Try some synchronous reads of other devices to get
-                * good data, much like with normal read errors.  Only
-                * read into the pages we already have so we don't
-                * need to re-issue the read request.
-                * We don't need to freeze the array, because being in an
-                * active sync request, there is no normal IO, and
-                * no overlapping syncs.
-                */
-               sector_t sect = r1_bio->sector;
-               int sectors = r1_bio->sectors;
-               int idx = 0;
-
-               while(sectors) {
-                       int s = sectors;
-                       int d = r1_bio->read_disk;
-                       int success = 0;
-                       mdk_rdev_t *rdev;
-
-                       if (s > (PAGE_SIZE>>9))
-                               s = PAGE_SIZE >> 9;
-                       do {
-                               if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
-                                       /* No rcu protection needed here devices
-                                        * can only be removed when no resync is
-                                        * active, and resync is currently active
-                                        */
-                                       rdev = conf->mirrors[d].rdev;
-                                       if (sync_page_io(rdev,
-                                                        sect,
-                                                        s<<9,
-                                                        bio->bi_io_vec[idx].bv_page,
-                                                        READ, false)) {
-                                               success = 1;
-                                               break;
-                                       }
-                               }
-                               d++;
-                               if (d == conf->raid_disks)
-                                       d = 0;
-                       } while (!success && d != r1_bio->read_disk);
-
-                       if (success) {
-                               int start = d;
-                               /* write it back and re-read */
-                               set_bit(R1BIO_Uptodate, &r1_bio->state);
-                               while (d != r1_bio->read_disk) {
-                                       if (d == 0)
-                                               d = conf->raid_disks;
-                                       d--;
-                                       if (r1_bio->bios[d]->bi_end_io != end_sync_read)
-                                               continue;
-                                       rdev = conf->mirrors[d].rdev;
-                                       atomic_add(s, &rdev->corrected_errors);
-                                       if (sync_page_io(rdev,
-                                                        sect,
-                                                        s<<9,
-                                                        bio->bi_io_vec[idx].bv_page,
-                                                        WRITE, false) == 0)
-                                               md_error(mddev, rdev);
-                               }
-                               d = start;
-                               while (d != r1_bio->read_disk) {
-                                       if (d == 0)
-                                               d = conf->raid_disks;
-                                       d--;
-                                       if (r1_bio->bios[d]->bi_end_io != end_sync_read)
-                                               continue;
-                                       rdev = conf->mirrors[d].rdev;
-                                       if (sync_page_io(rdev,
-                                                        sect,
-                                                        s<<9,
-                                                        bio->bi_io_vec[idx].bv_page,
-                                                        READ, false) == 0)
-                                               md_error(mddev, rdev);
-                               }
-                       } else {
-                               char b[BDEVNAME_SIZE];
-                               /* Cannot read from anywhere, array is toast */
-                               md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
-                               printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
-                                      " for block %llu\n",
-                                      mdname(mddev),
-                                      bdevname(bio->bi_bdev, b),
-                                      (unsigned long long)r1_bio->sector);
-                               md_done_sync(mddev, r1_bio->sectors, 0);
-                               put_buf(r1_bio);
-                               return;
+       set_bit(R1BIO_Uptodate, &r1_bio->state);
+       set_bit(BIO_UPTODATE, &bio->bi_flags);
+       return 1;
+}
+
+static int process_checks(r1bio_t *r1_bio)
+{
+       /* We have read all readable devices.  If we haven't
+        * got the block, then there is no hope left.
+        * If we have, then we want to do a comparison
+        * and skip the write if everything is the same.
+        * If any blocks failed to read, then we need to
+        * attempt an over-write
+        */
+       mddev_t *mddev = r1_bio->mddev;
+       conf_t *conf = mddev->private;
+       int primary;
+       int i;
+
+       for (primary = 0; primary < conf->raid_disks; primary++)
+               if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
+                   test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
+                       r1_bio->bios[primary]->bi_end_io = NULL;
+                       rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
+                       break;
+               }
+       r1_bio->read_disk = primary;
+       for (i = 0; i < conf->raid_disks; i++) {
+               int j;
+               int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
+               struct bio *pbio = r1_bio->bios[primary];
+               struct bio *sbio = r1_bio->bios[i];
+               int size;
+
+               if (r1_bio->bios[i]->bi_end_io != end_sync_read)
+                       continue;
+
+               if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
+                       for (j = vcnt; j-- ; ) {
+                               struct page *p, *s;
+                               p = pbio->bi_io_vec[j].bv_page;
+                               s = sbio->bi_io_vec[j].bv_page;
+                               if (memcmp(page_address(p),
+                                          page_address(s),
+                                          PAGE_SIZE))
+                                       break;
                        }
-                       sectors -= s;
-                       sect += s;
-                       idx ++;
+               } else
+                       j = 0;
+               if (j >= 0)
+                       mddev->resync_mismatches += r1_bio->sectors;
+               if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
+                             && test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
+                       /* No need to write to this device. */
+                       sbio->bi_end_io = NULL;
+                       rdev_dec_pending(conf->mirrors[i].rdev, mddev);
+                       continue;
+               }
+               /* fixup the bio for reuse */
+               sbio->bi_vcnt = vcnt;
+               sbio->bi_size = r1_bio->sectors << 9;
+               sbio->bi_idx = 0;
+               sbio->bi_phys_segments = 0;
+               sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
+               sbio->bi_flags |= 1 << BIO_UPTODATE;
+               sbio->bi_next = NULL;
+               sbio->bi_sector = r1_bio->sector +
+                       conf->mirrors[i].rdev->data_offset;
+               sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
+               size = sbio->bi_size;
+               for (j = 0; j < vcnt ; j++) {
+                       struct bio_vec *bi;
+                       bi = &sbio->bi_io_vec[j];
+                       bi->bv_offset = 0;
+                       if (size > PAGE_SIZE)
+                               bi->bv_len = PAGE_SIZE;
+                       else
+                               bi->bv_len = size;
+                       size -= PAGE_SIZE;
+                       memcpy(page_address(bi->bv_page),
+                              page_address(pbio->bi_io_vec[j].bv_page),
+                              PAGE_SIZE);
                }
        }
+       return 0;
+}
 
+static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
+{
+       conf_t *conf = mddev->private;
+       int i;
+       int disks = conf->raid_disks;
+       struct bio *bio, *wbio;
+
+       bio = r1_bio->bios[r1_bio->read_disk];
+
+       if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
+               /* ouch - failed to read all of that. */
+               if (!fix_sync_read_error(r1_bio))
+                       return;
+
+       if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+               if (process_checks(r1_bio) < 0)
+                       return;
        /*
         * schedule writes
         */
@@ -2063,7 +2061,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
        set_capacity(mddev->gendisk, mddev->array_sectors);
        revalidate_disk(mddev->gendisk);
        if (sectors > mddev->dev_sectors &&
-           mddev->recovery_cp == MaxSector) {
+           mddev->recovery_cp > mddev->dev_sectors) {
                mddev->recovery_cp = mddev->dev_sectors;
                set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        }
index cbfdf1a..5fc4ca1 100644 (file)
@@ -94,7 +94,9 @@ struct r1bio_s {
        int                     read_disk;
 
        struct list_head        retry_list;
-       struct bitmap_update    *bitmap_update;
+       /* Next two are only valid when R1BIO_BehindIO is set */
+       struct page             **behind_pages;
+       int                     behind_page_count;
        /*
         * if the IO is in WRITE direction, then multiple bios are used.
         * We choose the number when they are allocated.
index 8e94626..6e84668 100644 (file)
@@ -271,9 +271,10 @@ static void raid10_end_read_request(struct bio *bio, int error)
                 */
                set_bit(R10BIO_Uptodate, &r10_bio->state);
                raid_end_bio_io(r10_bio);
+               rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
        } else {
                /*
-                * oops, read error:
+                * oops, read error - keep the refcount on the rdev
                 */
                char b[BDEVNAME_SIZE];
                if (printk_ratelimit())
@@ -282,8 +283,6 @@ static void raid10_end_read_request(struct bio *bio, int error)
                               bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector);
                reschedule_retry(r10_bio);
        }
-
-       rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
 }
 
 static void raid10_end_write_request(struct bio *bio, int error)
@@ -488,13 +487,19 @@ static int raid10_mergeable_bvec(struct request_queue *q,
 static int read_balance(conf_t *conf, r10bio_t *r10_bio)
 {
        const sector_t this_sector = r10_bio->sector;
-       int disk, slot, nslot;
+       int disk, slot;
        const int sectors = r10_bio->sectors;
-       sector_t new_distance, current_distance;
+       sector_t new_distance, best_dist;
        mdk_rdev_t *rdev;
+       int do_balance;
+       int best_slot;
 
        raid10_find_phys(conf, r10_bio);
        rcu_read_lock();
+retry:
+       best_slot = -1;
+       best_dist = MaxSector;
+       do_balance = 1;
        /*
         * Check if we can balance. We can balance on the whole
         * device if no resync is going on (recovery is ok), or below
@@ -502,86 +507,58 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
         * above the resync window.
         */
        if (conf->mddev->recovery_cp < MaxSector
-           && (this_sector + sectors >= conf->next_resync)) {
-               /* make sure that disk is operational */
-               slot = 0;
-               disk = r10_bio->devs[slot].devnum;
-
-               while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
-                      r10_bio->devs[slot].bio == IO_BLOCKED ||
-                      !test_bit(In_sync, &rdev->flags)) {
-                       slot++;
-                       if (slot == conf->copies) {
-                               slot = 0;
-                               disk = -1;
-                               break;
-                       }
-                       disk = r10_bio->devs[slot].devnum;
-               }
-               goto rb_out;
-       }
-
+           && (this_sector + sectors >= conf->next_resync))
+               do_balance = 0;
 
-       /* make sure the disk is operational */
-       slot = 0;
-       disk = r10_bio->devs[slot].devnum;
-       while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
-              r10_bio->devs[slot].bio == IO_BLOCKED ||
-              !test_bit(In_sync, &rdev->flags)) {
-               slot ++;
-               if (slot == conf->copies) {
-                       disk = -1;
-                       goto rb_out;
-               }
+       for (slot = 0; slot < conf->copies ; slot++) {
+               if (r10_bio->devs[slot].bio == IO_BLOCKED)
+                       continue;
                disk = r10_bio->devs[slot].devnum;
-       }
-
-
-       current_distance = abs(r10_bio->devs[slot].addr -
-                              conf->mirrors[disk].head_position);
-
-       /* Find the disk whose head is closest,
-        * or - for far > 1 - find the closest to partition beginning */
-
-       for (nslot = slot; nslot < conf->copies; nslot++) {
-               int ndisk = r10_bio->devs[nslot].devnum;
-
-
-               if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
-                   r10_bio->devs[nslot].bio == IO_BLOCKED ||
-                   !test_bit(In_sync, &rdev->flags))
+               rdev = rcu_dereference(conf->mirrors[disk].rdev);
+               if (rdev == NULL)
                        continue;
+               if (!test_bit(In_sync, &rdev->flags))
+                       continue;
+
+               if (!do_balance)
+                       break;
 
                /* This optimisation is debatable, and completely destroys
                 * sequential read speed for 'far copies' arrays.  So only
                 * keep it for 'near' arrays, and review those later.
                 */
-               if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) {
-                       disk = ndisk;
-                       slot = nslot;
+               if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending))
                        break;
-               }
 
                /* for far > 1 always use the lowest address */
                if (conf->far_copies > 1)
-                       new_distance = r10_bio->devs[nslot].addr;
+                       new_distance = r10_bio->devs[slot].addr;
                else
-                       new_distance = abs(r10_bio->devs[nslot].addr -
-                                          conf->mirrors[ndisk].head_position);
-               if (new_distance < current_distance) {
-                       current_distance = new_distance;
-                       disk = ndisk;
-                       slot = nslot;
+                       new_distance = abs(r10_bio->devs[slot].addr -
+                                          conf->mirrors[disk].head_position);
+               if (new_distance < best_dist) {
+                       best_dist = new_distance;
+                       best_slot = slot;
                }
        }
+       if (slot == conf->copies)
+               slot = best_slot;
 
-rb_out:
-       r10_bio->read_slot = slot;
-/*     conf->next_seq_sect = this_sector + sectors;*/
-
-       if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL)
-               atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
-       else
+       if (slot >= 0) {
+               disk = r10_bio->devs[slot].devnum;
+               rdev = rcu_dereference(conf->mirrors[disk].rdev);
+               if (!rdev)
+                       goto retry;
+               atomic_inc(&rdev->nr_pending);
+               if (test_bit(Faulty, &rdev->flags)) {
+                       /* Cannot risk returning a device that failed
+                        * before we inc'ed nr_pending
+                        */
+                       rdev_dec_pending(rdev, conf->mddev);
+                       goto retry;
+               }
+               r10_bio->read_slot = slot;
+       } else
                disk = -1;
        rcu_read_unlock();
 
@@ -1460,40 +1437,33 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
        int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
        int d = r10_bio->devs[r10_bio->read_slot].devnum;
 
-       rcu_read_lock();
-       rdev = rcu_dereference(conf->mirrors[d].rdev);
-       if (rdev) { /* If rdev is not NULL */
-               char b[BDEVNAME_SIZE];
-               int cur_read_error_count = 0;
+       /* still own a reference to this rdev, so it cannot
+        * have been cleared recently.
+        */
+       rdev = conf->mirrors[d].rdev;
 
-               bdevname(rdev->bdev, b);
+       if (test_bit(Faulty, &rdev->flags))
+               /* drive has already been failed, just ignore any
+                  more fix_read_error() attempts */
+               return;
 
-               if (test_bit(Faulty, &rdev->flags)) {
-                       rcu_read_unlock();
-                       /* drive has already been failed, just ignore any
-                          more fix_read_error() attempts */