Merge ssh://master.kernel.org/pub/scm/linux/kernel/git/tglx/linux-2.6-x86
Linus Torvalds [Wed, 17 Oct 2007 20:13:16 +0000 (13:13 -0700)]
* ssh://master.kernel.org/pub/scm/linux/kernel/git/tglx/linux-2.6-x86: (114 commits)
  x86: delete vsyscall files during make clean
  kbuild: fix typo SRCARCH in find_sources
  x86: fix kernel rebuild due to vsyscall fallout
  .gitignore update for x86 arch
  x86: unify include/asm/debugreg_32/64.h
  x86: unify include/asm/unwind_32/64.h
  x86: unify include/asm/types_32/64.h
  x86: unify include/asm/tlb_32/64.h
  x86: unify include/asm/siginfo_32/64.h
  x86: unify include/asm/bug_32/64.h
  x86: unify include/asm/mman_32/64.h
  x86: unify include/asm/agp_32/64.h
  x86: unify include/asm/kdebug_32/64.h
  x86: unify include/asm/ioctls_32/64.h
  x86: unify include/asm/floppy_32/64.h
  x86: apply missing DMA/OOM prevention to floppy_32.h
  x86: unify include/asm/cache_32/64.h
  x86: unify include/asm/cache_32/64.h
  x86: unify include/asm/dmi_32/64.h
  x86: unify include/asm/delay_32/64.h
  ...

37 files changed:
arch/mips/au1000/Kconfig
arch/mips/au1000/common/irq.c
arch/mips/au1000/common/power.c
arch/mips/au1000/pb1200/irqmap.c
arch/mips/configs/mtx1_defconfig
arch/mips/kernel/head.S
arch/mips/kernel/time.c
arch/mips/kernel/traps.c
arch/mips/sgi-ip22/ip22-time.c
arch/mips/sibyte/bcm1480/time.c
arch/mips/sibyte/sb1250/time.c
arch/x86/kernel/alternative.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/paravirt_32.c
arch/x86/kernel/vmi_32.c
arch/x86/mm/init_32.c
arch/x86/xen/enlighten.c
arch/x86/xen/mmu.c
arch/x86/xen/multicalls.c
arch/x86/xen/multicalls.h
arch/x86/xen/smp.c
arch/x86/xen/time.c
arch/x86/xen/xen-ops.h
block/ll_rw_blk.c
drivers/char/hvc_lguest.c
drivers/lguest/core.c
drivers/lguest/lguest.c
drivers/lguest/lguest_bus.c
drivers/scsi/scsi_lib.c
include/asm-mips/mach-au1x00/au1000.h
include/asm-mips/mach-db1x00/db1200.h
include/asm-mips/mach-pb1x00/pb1200.h
include/asm-x86/paravirt.h
include/asm-x86/pgtable-3level-defs.h
include/xen/interface/vcpu.h
mm/Kconfig

index 29c95d9..a23d415 100644 (file)
@@ -137,6 +137,7 @@ config SOC_AU1200
 config SOC_AU1X00
        bool
        select 64BIT_PHYS_ADDR
+       select IRQ_CPU
        select SYS_HAS_CPU_MIPS32_R1
        select SYS_SUPPORTS_32BIT_KERNEL
        select SYS_SUPPORTS_APM_EMULATION
index c00f308..59e932a 100644 (file)
@@ -1,11 +1,10 @@
 /*
- * BRIEF MODULE DESCRIPTION
- *     Au1000 interrupt routines.
- *
  * Copyright 2001 MontaVista Software Inc.
  * Author: MontaVista Software, Inc.
  *             ppopov@mvista.com or source@mvista.com
  *
+ * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org)
+ *
  *  This program is free software; you can redistribute         it and/or modify it
  *  under  the terms of         the GNU General  Public License as published by the
  *  Free Software Foundation;  either version 2 of the License, or (at your
@@ -32,6 +31,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 
+#include <asm/irq_cpu.h>
 #include <asm/mipsregs.h>
 #include <asm/mach-au1x00/au1000.h>
 #ifdef CONFIG_MIPS_PB1000
@@ -44,7 +44,7 @@
 #define EXT_INTC1_REQ1 5 /* IP 5 */
 #define MIPS_TIMER_IP  7 /* IP 7 */
 
-void   (*board_init_irq)(void);
+void (*board_init_irq)(void) __initdata = NULL;
 
 static DEFINE_SPINLOCK(irq_lock);
 
@@ -134,12 +134,14 @@ void restore_au1xxx_intctl(void)
 
 inline void local_enable_irq(unsigned int irq_nr)
 {
-       if (irq_nr > AU1000_LAST_INTC0_INT) {
-               au_writel(1 << (irq_nr - 32), IC1_MASKSET);
-               au_writel(1 << (irq_nr - 32), IC1_WAKESET);
+       unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
+
+       if (bit >= 32) {
+               au_writel(1 << (bit - 32), IC1_MASKSET);
+               au_writel(1 << (bit - 32), IC1_WAKESET);
        } else {
-               au_writel(1 << irq_nr, IC0_MASKSET);
-               au_writel(1 << irq_nr, IC0_WAKESET);
+               au_writel(1 << bit, IC0_MASKSET);
+               au_writel(1 << bit, IC0_WAKESET);
        }
        au_sync();
 }
@@ -147,12 +149,14 @@ inline void local_enable_irq(unsigned int irq_nr)
 
 inline void local_disable_irq(unsigned int irq_nr)
 {
-       if (irq_nr > AU1000_LAST_INTC0_INT) {
-               au_writel(1 << (irq_nr - 32), IC1_MASKCLR);
-               au_writel(1 << (irq_nr - 32), IC1_WAKECLR);
+       unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
+
+       if (bit >= 32) {
+               au_writel(1 << (bit - 32), IC1_MASKCLR);
+               au_writel(1 << (bit - 32), IC1_WAKECLR);
        } else {
-               au_writel(1 << irq_nr, IC0_MASKCLR);
-               au_writel(1 << irq_nr, IC0_WAKECLR);
+               au_writel(1 << bit, IC0_MASKCLR);
+               au_writel(1 << bit, IC0_WAKECLR);
        }
        au_sync();
 }
@@ -160,12 +164,14 @@ inline void local_disable_irq(unsigned int irq_nr)
 
 static inline void mask_and_ack_rise_edge_irq(unsigned int irq_nr)
 {
-       if (irq_nr > AU1000_LAST_INTC0_INT) {
-               au_writel(1 << (irq_nr - 32), IC1_RISINGCLR);
-               au_writel(1 << (irq_nr - 32), IC1_MASKCLR);
+       unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
+
+       if (bit >= 32) {
+               au_writel(1 << (bit - 32), IC1_RISINGCLR);
+               au_writel(1 << (bit - 32), IC1_MASKCLR);
        } else {
-               au_writel(1 << irq_nr, IC0_RISINGCLR);
-               au_writel(1 << irq_nr, IC0_MASKCLR);
+               au_writel(1 << bit, IC0_RISINGCLR);
+               au_writel(1 << bit, IC0_MASKCLR);
        }
        au_sync();
 }
@@ -173,12 +179,14 @@ static inline void mask_and_ack_rise_edge_irq(unsigned int irq_nr)
 
 static inline void mask_and_ack_fall_edge_irq(unsigned int irq_nr)
 {
-       if (irq_nr > AU1000_LAST_INTC0_INT) {
-               au_writel(1 << (irq_nr - 32), IC1_FALLINGCLR);
-               au_writel(1 << (irq_nr - 32), IC1_MASKCLR);
+       unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
+
+       if (bit >= 32) {
+               au_writel(1 << (bit - 32), IC1_FALLINGCLR);
+               au_writel(1 << (bit - 32), IC1_MASKCLR);
        } else {
-               au_writel(1 << irq_nr, IC0_FALLINGCLR);
-               au_writel(1 << irq_nr, IC0_MASKCLR);
+               au_writel(1 << bit, IC0_FALLINGCLR);
+               au_writel(1 << bit, IC0_MASKCLR);
        }
        au_sync();
 }
@@ -186,17 +194,20 @@ static inline void mask_and_ack_fall_edge_irq(unsigned int irq_nr)
 
 static inline void mask_and_ack_either_edge_irq(unsigned int irq_nr)
 {
-       /* This may assume that we don't get interrupts from
+       unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
+
+       /*
+        * This may assume that we don't get interrupts from
         * both edges at once, or if we do, that we don't care.
         */
-       if (irq_nr > AU1000_LAST_INTC0_INT) {
-               au_writel(1 << (irq_nr - 32), IC1_FALLINGCLR);
-               au_writel(1 << (irq_nr - 32), IC1_RISINGCLR);
-               au_writel(1 << (irq_nr - 32), IC1_MASKCLR);
+       if (bit >= 32) {
+               au_writel(1 << (bit - 32), IC1_FALLINGCLR);
+               au_writel(1 << (bit - 32), IC1_RISINGCLR);
+               au_writel(1 << (bit - 32), IC1_MASKCLR);
        } else {
-               au_writel(1 << irq_nr, IC0_FALLINGCLR);
-               au_writel(1 << irq_nr, IC0_RISINGCLR);
-               au_writel(1 << irq_nr, IC0_MASKCLR);
+               au_writel(1 << bit, IC0_FALLINGCLR);
+               au_writel(1 << bit, IC0_RISINGCLR);
+               au_writel(1 << bit, IC0_MASKCLR);
        }
        au_sync();
 }
@@ -213,10 +224,8 @@ static inline void mask_and_ack_level_irq(unsigned int irq_nr)
                au_sync();
        }
 #endif
-       return;
 }
 
-
 static void end_irq(unsigned int irq_nr)
 {
        if (!(irq_desc[irq_nr].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
@@ -341,114 +350,118 @@ void startup_match20_interrupt(irq_handler_t handler)
 }
 #endif
 
-static void setup_local_irq(unsigned int irq_nr, int type, int int_req)
+static void __init setup_local_irq(unsigned int irq_nr, int type, int int_req)
 {
-       if (irq_nr > AU1000_MAX_INTR) return;
+       unsigned int bit = irq_nr - AU1000_INTC0_INT_BASE;
+
+       if (irq_nr > AU1000_MAX_INTR)
+               return;
+
        /* Config2[n], Config1[n], Config0[n] */
-       if (irq_nr > AU1000_LAST_INTC0_INT) {
+       if (bit >= 32) {
                switch (type) {
                case INTC_INT_RISE_EDGE: /* 0:0:1 */
-                       au_writel(1 << (irq_nr - 32), IC1_CFG2CLR);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG1CLR);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG0SET);
+                       au_writel(1 << (bit - 32), IC1_CFG2CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG1CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG0SET);
                        set_irq_chip(irq_nr, &rise_edge_irq_type);
                        break;
                case INTC_INT_FALL_EDGE: /* 0:1:0 */
-                       au_writel(1 << (irq_nr - 32), IC1_CFG2CLR);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG1SET);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG0CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG2CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG1SET);
+                       au_writel(1 << (bit - 32), IC1_CFG0CLR);
                        set_irq_chip(irq_nr, &fall_edge_irq_type);
                        break;
                case INTC_INT_RISE_AND_FALL_EDGE: /* 0:1:1 */
-                       au_writel(1 << (irq_nr - 32), IC1_CFG2CLR);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG1SET);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG0SET);
+                       au_writel(1 << (bit - 32), IC1_CFG2CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG1SET);
+                       au_writel(1 << (bit - 32), IC1_CFG0SET);
                        set_irq_chip(irq_nr, &either_edge_irq_type);
                        break;
                case INTC_INT_HIGH_LEVEL: /* 1:0:1 */
-                       au_writel(1 << (irq_nr - 32), IC1_CFG2SET);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG1CLR);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG0SET);
+                       au_writel(1 << (bit - 32), IC1_CFG2SET);
+                       au_writel(1 << (bit - 32), IC1_CFG1CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG0SET);
                        set_irq_chip(irq_nr, &level_irq_type);
                        break;
                case INTC_INT_LOW_LEVEL: /* 1:1:0 */
-                       au_writel(1 << (irq_nr - 32), IC1_CFG2SET);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG1SET);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG0CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG2SET);
+                       au_writel(1 << (bit - 32), IC1_CFG1SET);
+                       au_writel(1 << (bit - 32), IC1_CFG0CLR);
                        set_irq_chip(irq_nr, &level_irq_type);
                        break;
                case INTC_INT_DISABLED: /* 0:0:0 */
-                       au_writel(1 << (irq_nr - 32), IC1_CFG0CLR);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG1CLR);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG2CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG0CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG1CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG2CLR);
                        break;
                default: /* disable the interrupt */
                        printk(KERN_WARNING "unexpected int type %d (irq %d)\n",
                               type, irq_nr);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG0CLR);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG1CLR);
-                       au_writel(1 << (irq_nr - 32), IC1_CFG2CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG0CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG1CLR);
+                       au_writel(1 << (bit - 32), IC1_CFG2CLR);
                        return;
                }
                if (int_req) /* assign to interrupt request 1 */
-                       au_writel(1 << (irq_nr - 32), IC1_ASSIGNCLR);
+                       au_writel(1 << (bit - 32), IC1_ASSIGNCLR);
                else         /* assign to interrupt request 0 */
-                       au_writel(1 << (irq_nr - 32), IC1_ASSIGNSET);
-               au_writel(1 << (irq_nr - 32), IC1_SRCSET);
-               au_writel(1 << (irq_nr - 32), IC1_MASKCLR);
-               au_writel(1 << (irq_nr - 32), IC1_WAKECLR);
+                       au_writel(1 << (bit - 32), IC1_ASSIGNSET);
+               au_writel(1 << (bit - 32), IC1_SRCSET);
+               au_writel(1 << (bit - 32), IC1_MASKCLR);
+               au_writel(1 << (bit - 32), IC1_WAKECLR);
        } else {
                switch (type) {
                case INTC_INT_RISE_EDGE: /* 0:0:1 */
-                       au_writel(1 << irq_nr, IC0_CFG2CLR);
-                       au_writel(1 << irq_nr, IC0_CFG1CLR);
-                       au_writel(1 << irq_nr, IC0_CFG0SET);
+                       au_writel(1 << bit, IC0_CFG2CLR);
+                       au_writel(1 << bit, IC0_CFG1CLR);
+                       au_writel(1 << bit, IC0_CFG0SET);
                        set_irq_chip(irq_nr, &rise_edge_irq_type);
                        break;
                case INTC_INT_FALL_EDGE: /* 0:1:0 */
-                       au_writel(1 << irq_nr, IC0_CFG2CLR);
-                       au_writel(1 << irq_nr, IC0_CFG1SET);
-                       au_writel(1 << irq_nr, IC0_CFG0CLR);
+                       au_writel(1 << bit, IC0_CFG2CLR);
+                       au_writel(1 << bit, IC0_CFG1SET);
+                       au_writel(1 << bit, IC0_CFG0CLR);
                        set_irq_chip(irq_nr, &fall_edge_irq_type);
                        break;
                case INTC_INT_RISE_AND_FALL_EDGE: /* 0:1:1 */
-                       au_writel(1 << irq_nr, IC0_CFG2CLR);
-                       au_writel(1 << irq_nr, IC0_CFG1SET);
-                       au_writel(1 << irq_nr, IC0_CFG0SET);
+                       au_writel(1 << bit, IC0_CFG2CLR);
+                       au_writel(1 << bit, IC0_CFG1SET);
+                       au_writel(1 << bit, IC0_CFG0SET);
                        set_irq_chip(irq_nr, &either_edge_irq_type);
                        break;
                case INTC_INT_HIGH_LEVEL: /* 1:0:1 */
-                       au_writel(1 << irq_nr, IC0_CFG2SET);
-                       au_writel(1 << irq_nr, IC0_CFG1CLR);
-                       au_writel(1 << irq_nr, IC0_CFG0SET);
+                       au_writel(1 << bit, IC0_CFG2SET);
+                       au_writel(1 << bit, IC0_CFG1CLR);
+                       au_writel(1 << bit, IC0_CFG0SET);
                        set_irq_chip(irq_nr, &level_irq_type);
                        break;
                case INTC_INT_LOW_LEVEL: /* 1:1:0 */
-                       au_writel(1 << irq_nr, IC0_CFG2SET);
-                       au_writel(1 << irq_nr, IC0_CFG1SET);
-                       au_writel(1 << irq_nr, IC0_CFG0CLR);
+                       au_writel(1 << bit, IC0_CFG2SET);
+                       au_writel(1 << bit, IC0_CFG1SET);
+                       au_writel(1 << bit, IC0_CFG0CLR);
                        set_irq_chip(irq_nr, &level_irq_type);
                        break;
                case INTC_INT_DISABLED: /* 0:0:0 */
-                       au_writel(1 << irq_nr, IC0_CFG0CLR);
-                       au_writel(1 << irq_nr, IC0_CFG1CLR);
-                       au_writel(1 << irq_nr, IC0_CFG2CLR);
+                       au_writel(1 << bit, IC0_CFG0CLR);
+                       au_writel(1 << bit, IC0_CFG1CLR);
+                       au_writel(1 << bit, IC0_CFG2CLR);
                        break;
                default: /* disable the interrupt */
                        printk(KERN_WARNING "unexpected int type %d (irq %d)\n",
                               type, irq_nr);
-                       au_writel(1 << irq_nr, IC0_CFG0CLR);
-                       au_writel(1 << irq_nr, IC0_CFG1CLR);
-                       au_writel(1 << irq_nr, IC0_CFG2CLR);
+                       au_writel(1 << bit, IC0_CFG0CLR);
+                       au_writel(1 << bit, IC0_CFG1CLR);
+                       au_writel(1 << bit, IC0_CFG2CLR);
                        return;
                }
                if (int_req) /* assign to interrupt request 1 */
-                       au_writel(1 << irq_nr, IC0_ASSIGNCLR);
+                       au_writel(1 << bit, IC0_ASSIGNCLR);
                else         /* assign to interrupt request 0 */
-                       au_writel(1 << irq_nr, IC0_ASSIGNSET);
-               au_writel(1 << irq_nr, IC0_SRCSET);
-               au_writel(1 << irq_nr, IC0_MASKCLR);
-               au_writel(1 << irq_nr, IC0_WAKECLR);
+                       au_writel(1 << bit, IC0_ASSIGNSET);
+               au_writel(1 << bit, IC0_SRCSET);
+               au_writel(1 << bit, IC0_MASKCLR);
+               au_writel(1 << bit, IC0_WAKECLR);
        }
        au_sync();
 }
@@ -461,8 +474,8 @@ static void setup_local_irq(unsigned int irq_nr, int type, int int_req)
 
 static void intc0_req0_irqdispatch(void)
 {
-       int irq = 0;
        static unsigned long intc0_req0;
+       unsigned int bit;
 
        intc0_req0 |= au_readl(IC0_REQ0INT);
 
@@ -481,25 +494,25 @@ static void intc0_req0_irqdispatch(void)
                return;
        }
 #endif
-       irq = ffs(intc0_req0);
-       intc0_req0 &= ~(1 << irq);
-       do_IRQ(irq);
+       bit = ffs(intc0_req0);
+       intc0_req0 &= ~(1 << bit);
+       do_IRQ(MIPS_CPU_IRQ_BASE + bit);
 }
 
 
 static void intc0_req1_irqdispatch(void)
 {
-       int irq = 0;
        static unsigned long intc0_req1;
+       unsigned int bit;
 
        intc0_req1 |= au_readl(IC0_REQ1INT);
 
        if (!intc0_req1)
                return;
 
-       irq = ffs(intc0_req1);
-       intc0_req1 &= ~(1 << irq);
-       do_IRQ(irq);
+       bit = ffs(intc0_req1);
+       intc0_req1 &= ~(1 << bit);
+       do_IRQ(bit);
 }
 
 
@@ -509,43 +522,41 @@ static void intc0_req1_irqdispatch(void)
  */
 static void intc1_req0_irqdispatch(void)
 {
-       int irq = 0;
        static unsigned long intc1_req0;
+       unsigned int bit;
 
        intc1_req0 |= au_readl(IC1_REQ0INT);
 
        if (!intc1_req0)
                return;
 
-       irq = ffs(intc1_req0);
-       intc1_req0 &= ~(1 << irq);
-       irq += 32;
-       do_IRQ(irq);
+       bit = ffs(intc1_req0);
+       intc1_req0 &= ~(1 << bit);
+       do_IRQ(MIPS_CPU_IRQ_BASE + 32 + bit);
 }
 
 
 static void intc1_req1_irqdispatch(void)
 {
-       int irq = 0;
        static unsigned long intc1_req1;
+       unsigned int bit;
 
        intc1_req1 |= au_readl(IC1_REQ1INT);
 
        if (!intc1_req1)
                return;
 
-       irq = ffs(intc1_req1);
-       intc1_req1 &= ~(1 << irq);
-       irq += 32;
-       do_IRQ(irq);
+       bit = ffs(intc1_req1);
+       intc1_req1 &= ~(1 << bit);
+       do_IRQ(MIPS_CPU_IRQ_BASE + 32 + bit);
 }
 
 asmlinkage void plat_irq_dispatch(void)
 {
-       unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM;
+       unsigned int pending = read_c0_status() & read_c0_cause();
 
        if (pending & CAUSEF_IP7)
-               do_IRQ(63);
+               do_IRQ(MIPS_CPU_IRQ_BASE + 7);
        else if (pending & CAUSEF_IP2)
                intc0_req0_irqdispatch();
        else if (pending & CAUSEF_IP3)
@@ -561,17 +572,15 @@ asmlinkage void plat_irq_dispatch(void)
 void __init arch_init_irq(void)
 {
        int i;
-       unsigned long cp0_status;
        struct au1xxx_irqmap *imp;
        extern struct au1xxx_irqmap au1xxx_irq_map[];
        extern struct au1xxx_irqmap au1xxx_ic0_map[];
        extern int au1xxx_nr_irqs;
        extern int au1xxx_ic0_nr_irqs;
 
-       cp0_status = read_c0_status();
-
-       /* Initialize interrupt controllers to a safe state.
-       */
+       /*
+        * Initialize interrupt controllers to a safe state.
+        */
        au_writel(0xffffffff, IC0_CFG0CLR);
        au_writel(0xffffffff, IC0_CFG1CLR);
        au_writel(0xffffffff, IC0_CFG2CLR);
@@ -594,16 +603,20 @@ void __init arch_init_irq(void)
        au_writel(0xffffffff, IC1_RISINGCLR);
        au_writel(0x00000000, IC1_TESTBIT);
 
-       /* Initialize IC0, which is fixed per processor.
-       */
+       mips_cpu_irq_init();
+
+       /*
+        * Initialize IC0, which is fixed per processor.
+        */
        imp = au1xxx_ic0_map;
        for (i = 0; i < au1xxx_ic0_nr_irqs; i++) {
                setup_local_irq(imp->im_irq, imp->im_type, imp->im_request);
                imp++;
        }
 
-       /* Now set up the irq mapping for the board.
-       */
+       /*
+        * Now set up the irq mapping for the board.
+        */
        imp = au1xxx_irq_map;
        for (i = 0; i < au1xxx_nr_irqs; i++) {
                setup_local_irq(imp->im_irq, imp->im_type, imp->im_request);
@@ -615,5 +628,5 @@ void __init arch_init_irq(void)
        /* Board specific IRQ initialization.
        */
        if (board_init_irq)
-               (*board_init_irq)();
+               board_init_irq();
 }
index 6f57f72..54047d6 100644 (file)
@@ -403,9 +403,9 @@ static int pm_do_freq(ctl_table * ctl, int write, struct file *file,
        }
 
 
-       /* We don't want _any_ interrupts other than
-        * match20. Otherwise our au1000_calibrate_delay()
-        * calculation will be off, potentially a lot.
+       /*
+        * We don't want _any_ interrupts other than match20. Otherwise our
+        * au1000_calibrate_delay() calculation will be off, potentially a lot.
         */
        intc0_mask = save_local_and_disable(0);
        intc1_mask = save_local_and_disable(1);
@@ -414,6 +414,7 @@ static int pm_do_freq(ctl_table * ctl, int write, struct file *file,
        au1000_calibrate_delay();
        restore_local_and_enable(0, intc0_mask);
        restore_local_and_enable(1, intc1_mask);
+
        return retval;
 }
 
index 3bee274..5f48b06 100644 (file)
@@ -74,7 +74,7 @@ irqreturn_t pb1200_cascade_handler( int irq, void *dev_id)
        bcsr->int_status = bisr;
        for( ; bisr; bisr &= (bisr-1) )
        {
-               extirq_nr = PB1200_INT_BEGIN + au_ffs(bisr);
+               extirq_nr = PB1200_INT_BEGIN + ffs(bisr);
                /* Ack and dispatch IRQ */
                do_IRQ(extirq_nr);
        }
index 0280ef3..b536d7c 100644 (file)
@@ -3021,7 +3021,7 @@ CONFIG_MAGIC_SYSRQ=y
 # CONFIG_DEBUG_FS is not set
 # CONFIG_HEADERS_CHECK is not set
 # CONFIG_DEBUG_KERNEL is not set
-# CONFIG_CROSSCOMPILE is not set
+CONFIG_CROSSCOMPILE=y
 CONFIG_CMDLINE=""
 CONFIG_SYS_SUPPORTS_KGDB=y
 
index e46782b..bf164a5 100644 (file)
 
 EXPORT(_stext)
 
-#ifndef CONFIG_BOOT_RAW
+#ifdef CONFIG_BOOT_RAW
        /*
         * Give us a fighting chance of running if execution beings at the
         * kernel load address.  This is needed because this platform does
@@ -149,6 +149,8 @@ EXPORT(_stext)
        __INIT
 #endif
 
+       __INIT_REFOK
+
 NESTED(kernel_entry, 16, sp)                   # kernel entry point
 
        kernel_entry_setup                      # cpu specific setup
index 05b3651..e4b5e64 100644 (file)
@@ -391,6 +391,50 @@ static void mips_event_handler(struct clock_event_device *dev)
 {
 }
 
+/*
+ * FIXME: This doesn't hold for the relocated E9000 compare interrupt.
+ */
+static int c0_compare_int_pending(void)
+{
+       return (read_c0_cause() >> cp0_compare_irq) & 0x100;
+}
+
+static int c0_compare_int_usable(void)
+{
+       const unsigned int delta = 0x300000;
+       unsigned int cnt;
+
+       /*
+        * IP7 already pending?  Try to clear it by acking the timer.
+        */
+       if (c0_compare_int_pending()) {
+               write_c0_compare(read_c0_compare());
+               irq_disable_hazard();
+               if (c0_compare_int_pending())
+                       return 0;
+       }
+
+       cnt = read_c0_count();
+       cnt += delta;
+       write_c0_compare(cnt);
+
+       while ((long)(read_c0_count() - cnt) <= 0)
+               ;       /* Wait for expiry  */
+
+       if (!c0_compare_int_pending())
+               return 0;
+
+       write_c0_compare(read_c0_compare());
+       irq_disable_hazard();
+       if (c0_compare_int_pending())
+               return 0;
+
+       /*
+        * Feels like a real count / compare timer.
+        */
+       return 1;
+}
+
 void __cpuinit mips_clockevent_init(void)
 {
        uint64_t mips_freq = mips_hpt_frequency;
@@ -412,6 +456,9 @@ void __cpuinit mips_clockevent_init(void)
                return;
 #endif
 
+       if (!c0_compare_int_usable())
+               return;
+
        cd = &per_cpu(mips_clockevent_device, cpu);
 
        cd->name                = "MIPS";
index 9c0c478..bbf01b8 100644 (file)
@@ -9,9 +9,10 @@
  * Copyright (C) 1999 Silicon Graphics, Inc.
  * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
  * Copyright (C) 2000, 01 MIPS Technologies, Inc.
- * Copyright (C) 2002, 2003, 2004, 2005  Maciej W. Rozycki
+ * Copyright (C) 2002, 2003, 2004, 2005, 2007  Maciej W. Rozycki
  */
 #include <linux/bug.h>
+#include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -410,7 +411,7 @@ asmlinkage void do_be(struct pt_regs *regs)
 }
 
 /*
- * ll/sc emulation
+ * ll/sc, rdhwr, sync emulation
  */
 
 #define OPCODE 0xfc000000
@@ -419,9 +420,11 @@ asmlinkage void do_be(struct pt_regs *regs)
 #define OFFSET 0x0000ffff
 #define LL     0xc0000000
 #define SC     0xe0000000
+#define SPEC0  0x00000000
 #define SPEC3  0x7c000000
 #define RD     0x0000f800
 #define FUNC   0x0000003f
+#define SYNC   0x0000000f
 #define RDHWR  0x0000003b
 
 /*
@@ -432,11 +435,10 @@ unsigned long ll_bit;
 
 static struct task_struct *ll_task = NULL;
 
-static inline void simulate_ll(struct pt_regs *regs, unsigned int opcode)
+static inline int simulate_ll(struct pt_regs *regs, unsigned int opcode)
 {
        unsigned long value, __user *vaddr;
        long offset;
-       int signal = 0;
 
        /*
         * analyse the ll instruction that just caused a ri exception
@@ -451,14 +453,10 @@ static inline void simulate_ll(struct pt_regs *regs, unsigned int opcode)
        vaddr = (unsigned long __user *)
                ((unsigned long)(regs->regs[(opcode & BASE) >> 21]) + offset);
 
-       if ((unsigned long)vaddr & 3) {
-               signal = SIGBUS;
-               goto sig;
-       }
-       if (get_user(value, vaddr)) {
-               signal = SIGSEGV;
-               goto sig;
-       }
+       if ((unsigned long)vaddr & 3)
+               return SIGBUS;
+       if (get_user(value, vaddr))
+               return SIGSEGV;
 
        preempt_disable();
 
@@ -471,22 +469,16 @@ static inline void simulate_ll(struct pt_regs *regs, unsigned int opcode)
 
        preempt_enable();
 
-       compute_return_epc(regs);
-
        regs->regs[(opcode & RT) >> 16] = value;
 
-       return;
-
-sig:
-       force_sig(signal, current);
+       return 0;
 }
 
-static inline void simulate_sc(struct pt_regs *regs, unsigned int opcode)
+static inline int simulate_sc(struct pt_regs *regs, unsigned int opcode)
 {
        unsigned long __user *vaddr;
        unsigned long reg;
        long offset;
-       int signal = 0;
 
        /*
         * analyse the sc instruction that just caused a ri exception
@@ -502,34 +494,25 @@ static inline void simulate_sc(struct pt_regs *regs, unsigned int opcode)
                ((unsigned long)(regs->regs[(opcode & BASE) >> 21]) + offset);
        reg = (opcode & RT) >> 16;
 
-       if ((unsigned long)vaddr & 3) {
-               signal = SIGBUS;
-               goto sig;
-       }
+       if ((unsigned long)vaddr & 3)
+               return SIGBUS;
 
        preempt_disable();
 
        if (ll_bit == 0 || ll_task != current) {
-               compute_return_epc(regs);
                regs->regs[reg] = 0;
                preempt_enable();
-               return;
+               return 0;
        }
 
        preempt_enable();
 
-       if (put_user(regs->regs[reg], vaddr)) {
-               signal = SIGSEGV;
-               goto sig;
-       }
+       if (put_user(regs->regs[reg], vaddr))
+               return SIGSEGV;
 
-       compute_return_epc(regs);
        regs->regs[reg] = 1;
 
-       return;
-
-sig:
-       force_sig(signal, current);
+       return 0;
 }
 
 /*
@@ -539,27 +522,14 @@ sig:
  * few processors such as NEC's VR4100 throw reserved instruction exceptions
  * instead, so we're doing the emulation thing in both exception handlers.
  */
-static inline int simulate_llsc(struct pt_regs *regs)
+static int simulate_llsc(struct pt_regs *regs, unsigned int opcode)
 {
-       unsigned int opcode;
-
-       if (get_user(opcode, (unsigned int __user *) exception_epc(regs)))
-               goto out_sigsegv;
-
-       if ((opcode & OPCODE) == LL) {
-               simulate_ll(regs, opcode);
-               return 0;
-       }
-       if ((opcode & OPCODE) == SC) {
-               simulate_sc(regs, opcode);
-               return 0;
-       }
-
-       return -EFAULT;                 /* Strange things going on ... */
+       if ((opcode & OPCODE) == LL)
+               return simulate_ll(regs, opcode);
+       if ((opcode & OPCODE) == SC)
+               return simulate_sc(regs, opcode);
 
-out_sigsegv:
-       force_sig(SIGSEGV, current);
-       return -EFAULT;
+       return -1;                      /* Must be something else ... */
 }
 
 /*
@@ -567,16 +537,9 @@ out_sigsegv:
  * registers not implemented in hardware.  The only current use of this
  * is the thread area pointer.
  */
-static inline int simulate_rdhwr(struct pt_regs *regs)
+static int simulate_rdhwr(struct pt_regs *regs, unsigned int opcode)
 {
        struct thread_info *ti = task_thread_info(current);
-       unsigned int opcode;
-
-       if (get_user(opcode, (unsigned int __user *) exception_epc(regs)))
-               goto out_sigsegv;
-
-       if (unlikely(compute_return_epc(regs)))
-               return -EFAULT;
 
        if ((opcode & OPCODE) == SPEC3 && (opcode & FUNC) == RDHWR) {
                int rd = (opcode & RD) >> 11;
@@ -586,16 +549,20 @@ static inline int simulate_rdhwr(struct pt_regs *regs)
                                regs->regs[rt] = ti->tp_value;
                                return 0;
                        default:
-                               return -EFAULT;
+                               return -1;
                }
        }
 
        /* Not ours.  */
-       return -EFAULT;
+       return -1;
+}
 
-out_sigsegv:
-       force_sig(SIGSEGV, current);
-       return -EFAULT;
+static int simulate_sync(struct pt_regs *regs, unsigned int opcode)
+{
+       if ((opcode & OPCODE) == SPEC0 && (opcode & FUNC) == SYNC)
+               return 0;
+
+       return -1;                      /* Must be something else ... */
 }
 
 asmlinkage void do_ov(struct pt_regs *regs)
@@ -767,16 +734,35 @@ out_sigsegv:
 
 asmlinkage void do_ri(struct pt_regs *regs)
 {
-       die_if_kernel("Reserved instruction in kernel code", regs);
+       unsigned int __user *epc = (unsigned int __user *)exception_epc(regs);
+       unsigned long old_epc = regs->cp0_epc;
+       unsigned int opcode = 0;
+       int status = -1;
 
-       if (!cpu_has_llsc)
-               if (!simulate_llsc(regs))
-                       return;
+       die_if_kernel("Reserved instruction in kernel code", regs);
 
-       if (!simulate_rdhwr(regs))
+       if (unlikely(compute_return_epc(regs) < 0))
                return;
 
-       force_sig(SIGILL, current);
+       if (unlikely(get_user(opcode, epc) < 0))
+               status = SIGSEGV;
+
+       if (!cpu_has_llsc && status < 0)
+               status = simulate_llsc(regs, opcode);
+
+       if (status < 0)
+               status = simulate_rdhwr(regs, opcode);
+
+       if (status < 0)
+               status = simulate_sync(regs, opcode);
+
+       if (status < 0)
+               status = SIGILL;
+
+       if (unlikely(status > 0)) {
+               regs->cp0_epc = old_epc;                /* Undo skip-over.  */
+               force_sig(status, current);
+       }
 }
 
 /*
@@ -808,7 +794,11 @@ static void mt_ase_fp_affinity(void)
 
 asmlinkage void do_cpu(struct pt_regs *regs)
 {
+       unsigned int __user *epc;
+       unsigned long old_epc;
+       unsigned int opcode;
        unsigned int cpid;
+       int status;
 
        die_if_kernel("do_cpu invoked from kernel context!", regs);
 
@@ -816,14 +806,32 @@ asmlinkage void do_cpu(struct pt_regs *regs)
 
        switch (cpid) {
        case 0:
-               if (!cpu_has_llsc)
-                       if (!simulate_llsc(regs))
-                               return;
+               epc = (unsigned int __user *)exception_epc(regs);
+               old_epc = regs->cp0_epc;
+               opcode = 0;
+               status = -1;
 
-               if (!simulate_rdhwr(regs))
+               if (unlikely(compute_return_epc(regs) < 0))
                        return;
 
-               break;
+               if (unlikely(get_user(opcode, epc) < 0))
+                       status = SIGSEGV;
+
+               if (!cpu_has_llsc && status < 0)
+                       status = simulate_llsc(regs, opcode);
+
+               if (status < 0)
+                       status = simulate_rdhwr(regs, opcode);
+
+               if (status < 0)
+                       status = SIGILL;
+
+               if (unlikely(status > 0)) {
+                       regs->cp0_epc = old_epc;        /* Undo skip-over.  */
+                       force_sig(status, current);
+               }
+
+               return;
 
        case 1:
                if (used_math())        /* Using the FPU again.  */
index 9b9bffd..10e5054 100644 (file)
@@ -192,12 +192,3 @@ void indy_8254timer_irq(void)
        ArcEnterInteractiveMode();
        irq_exit();
 }
-
-void __init plat_timer_setup(struct irqaction *irq)
-{
-       /* over-write the handler, we use our own way */
-       irq->handler = no_action;
-
-       /* setup irqaction */
-       setup_irq(SGI_TIMER_IRQ, irq);
-}
index 40d7126..5b4bfbb 100644 (file)
@@ -84,7 +84,7 @@ static void sibyte_set_mode(enum clock_event_mode mode,
        void __iomem *timer_cfg, *timer_init;
 
        timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
-       timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
+       timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
 
        switch (mode) {
        case CLOCK_EVT_MODE_PERIODIC:
index 38199ad..fe11fed 100644 (file)
@@ -83,7 +83,7 @@ static void sibyte_set_mode(enum clock_event_mode mode,
        void __iomem *timer_cfg, *timer_init;
 
        timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
-       timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
+       timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
 
        switch(mode) {
        case CLOCK_EVT_MODE_PERIODIC:
@@ -111,7 +111,7 @@ sibyte_next_event(unsigned long delta, struct clock_event_device *evt)
        void __iomem *timer_cfg, *timer_init;
 
        timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
-       timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
+       timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
 
        __raw_writeq(0, timer_cfg);
        __raw_writeq(delta, timer_init);
@@ -155,7 +155,7 @@ static void sibyte_set_mode(enum clock_event_mode mode,
        void __iomem *timer_cfg, *timer_init;
 
        timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
-       timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
+       timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
 
        switch (mode) {
        case CLOCK_EVT_MODE_PERIODIC:
@@ -183,7 +183,7 @@ sibyte_next_event(unsigned long delta, struct clock_event_device *evt)
        void __iomem *timer_cfg, *timer_init;
 
        timer_cfg = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
-       timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG));
+       timer_init = IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT));
 
        __raw_writeq(0, timer_cfg);
        __raw_writeq(delta, timer_init);
index a3ae8e6..3bd2688 100644 (file)
@@ -390,8 +390,8 @@ void apply_paravirt(struct paravirt_patch_site *start,
                BUG_ON(p->len > MAX_PATCH_LEN);
                /* prep the buffer with the original instructions */
                memcpy(insnbuf, p->instr, p->len);
-               used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf,
-                                         (unsigned long)p->instr, p->len);
+               used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
+                                        (unsigned long)p->instr, p->len);
 
                BUG_ON(used > p->len);
 
index 8029742..f1b7cdd 100644 (file)
@@ -116,12 +116,14 @@ void foo(void)
 
 #ifdef CONFIG_PARAVIRT
        BLANK();
-       OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled);
-       OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable);
-       OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable);
-       OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit);
-       OFFSET(PARAVIRT_iret, paravirt_ops, iret);
-       OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
+       OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
+       OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops);
+       OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops);
+       OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
+       OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
+       OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+       OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
+       OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
 #endif
 
 #ifdef CONFIG_XEN
index 8099fea..dc7f938 100644 (file)
@@ -437,7 +437,7 @@ ldt_ss:
         * is still available to implement the setting of the high
         * 16-bits in the INTERRUPT_RETURN paravirt-op.
         */
-       cmpl $0, paravirt_ops+PARAVIRT_enabled
+       cmpl $0, pv_info+PARAVIRT_enabled
        jne restore_nocheck
 #endif
 
index 739cfb2..6a80d67 100644 (file)
@@ -42,32 +42,33 @@ void _paravirt_nop(void)
 static void __init default_banner(void)
 {
        printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
-              paravirt_ops.name);
+              pv_info.name);
 }
 
 char *memory_setup(void)
 {
-       return paravirt_ops.memory_setup();
+       return pv_init_ops.memory_setup();
 }
 
 /* Simple instruction patching code. */
-#define DEF_NATIVE(name, code)                                 \
-       extern const char start_##name[], end_##name[];         \
-       asm("start_" #name ": " code "; end_" #name ":")
-
-DEF_NATIVE(irq_disable, "cli");
-DEF_NATIVE(irq_enable, "sti");
-DEF_NATIVE(restore_fl, "push %eax; popf");
-DEF_NATIVE(save_fl, "pushf; pop %eax");
-DEF_NATIVE(iret, "iret");
-DEF_NATIVE(irq_enable_sysexit, "sti; sysexit");
-DEF_NATIVE(read_cr2, "mov %cr2, %eax");
-DEF_NATIVE(write_cr3, "mov %eax, %cr3");
-DEF_NATIVE(read_cr3, "mov %cr3, %eax");
-DEF_NATIVE(clts, "clts");
-DEF_NATIVE(read_tsc, "rdtsc");
-
-DEF_NATIVE(ud2a, "ud2a");
+#define DEF_NATIVE(ops, name, code)                                    \
+       extern const char start_##ops##_##name[], end_##ops##_##name[]; \
+       asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
+
+DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
+DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
+DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
+DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
+DEF_NATIVE(pv_cpu_ops, iret, "iret");
+DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
+DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
+DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
+DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
+DEF_NATIVE(pv_cpu_ops, clts, "clts");
+DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
+
+/* Undefined instruction for dealing with missing ops pointers. */
+static const unsigned char ud2a[] = { 0x0f, 0x0b };
 
 static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
                             unsigned long addr, unsigned len)
@@ -76,37 +77,29 @@ static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
        unsigned ret;
 
        switch(type) {
-#define SITE(x)        case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site
-               SITE(irq_disable);
-               SITE(irq_enable);
-               SITE(restore_fl);
-               SITE(save_fl);
-               SITE(iret);
-               SITE(irq_enable_sysexit);
-               SITE(read_cr2);
-               SITE(read_cr3);
-               SITE(write_cr3);
-               SITE(clts);
-               SITE(read_tsc);
+#define SITE(ops, x)                                           \
+       case PARAVIRT_PATCH(ops.x):                             \
+               start = start_##ops##_##x;                      \
+               end = end_##ops##_##x;                          \
+               goto patch_site
+
+       SITE(pv_irq_ops, irq_disable);
+       SITE(pv_irq_ops, irq_enable);
+       SITE(pv_irq_ops, restore_fl);
+       SITE(pv_irq_ops, save_fl);
+       SITE(pv_cpu_ops, iret);
+       SITE(pv_cpu_ops, irq_enable_sysexit);
+       SITE(pv_mmu_ops, read_cr2);
+       SITE(pv_mmu_ops, read_cr3);
+       SITE(pv_mmu_ops, write_cr3);
+       SITE(pv_cpu_ops, clts);
+       SITE(pv_cpu_ops, read_tsc);
 #undef SITE
 
        patch_site:
                ret = paravirt_patch_insns(ibuf, len, start, end);
                break;
 
-       case PARAVIRT_PATCH(make_pgd):
-       case PARAVIRT_PATCH(make_pte):
-       case PARAVIRT_PATCH(pgd_val):
-       case PARAVIRT_PATCH(pte_val):
-#ifdef CONFIG_X86_PAE
-       case PARAVIRT_PATCH(make_pmd):
-       case PARAVIRT_PATCH(pmd_val):
-#endif
-               /* These functions end up returning exactly what
-                  they're passed, in the same registers. */
-               ret = paravirt_patch_nop();
-               break;
-
        default:
                ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
                break;
@@ -150,7 +143,7 @@ unsigned paravirt_patch_call(void *insnbuf,
        return 5;
 }
 
-unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
+unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
                            unsigned long addr, unsigned len)
 {
        struct branch *b = insnbuf;
@@ -165,22 +158,37 @@ unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
        return 5;
 }
 
+/* Neat trick to map patch type back to the call within the
+ * corresponding structure. */
+static void *get_call_destination(u8 type)
+{
+       struct paravirt_patch_template tmpl = {
+               .pv_init_ops = pv_init_ops,
+               .pv_time_ops = pv_time_ops,
+               .pv_cpu_ops = pv_cpu_ops,
+               .pv_irq_ops = pv_irq_ops,
+               .pv_apic_ops = pv_apic_ops,
+               .pv_mmu_ops = pv_mmu_ops,
+       };
+       return *((void **)&tmpl + type);
+}
+
 unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
                                unsigned long addr, unsigned len)
 {
-       void *opfunc = *((void **)&paravirt_ops + type);
+       void *opfunc = get_call_destination(type);
        unsigned ret;
 
        if (opfunc == NULL)
                /* If there's no function, patch it with a ud2a (BUG) */
-               ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a);
+               ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
        else if (opfunc == paravirt_nop)
                /* If the operation is a nop, then nop the callsite */
                ret = paravirt_patch_nop();
-       else if (type == PARAVIRT_PATCH(iret) ||
-                type == PARAVIRT_PATCH(irq_enable_sysexit))
+       else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
+                type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit))
                /* If operation requires a jmp, then jmp */
-               ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len);
+               ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
        else
                /* Otherwise call the function; assume target could
                   clobber any caller-save reg */
@@ -205,7 +213,7 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
 
 void init_IRQ(void)
 {
-       paravirt_ops.init_IRQ();
+       pv_irq_ops.init_IRQ();
 }
 
 static void native_flush_tlb(void)
@@ -233,7 +241,7 @@ extern void native_irq_enable_sysexit(void);
 
 static int __init print_banner(void)
 {
-       paravirt_ops.banner();
+       pv_init_ops.banner();
        return 0;
 }
 core_initcall(print_banner);
@@ -273,47 +281,96 @@ int paravirt_disable_iospace(void)
        return ret;
 }
 
-struct paravirt_ops paravirt_ops = {
+static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
+
+static inline void enter_lazy(enum paravirt_lazy_mode mode)
+{
+       BUG_ON(x86_read_percpu(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
+       BUG_ON(preemptible());
+
+       x86_write_percpu(paravirt_lazy_mode, mode);
+}
+
+void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
+{
+       BUG_ON(x86_read_percpu(paravirt_lazy_mode) != mode);
+       BUG_ON(preemptible());
+
+       x86_write_percpu(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
+}
+
+void paravirt_enter_lazy_mmu(void)
+{
+       enter_lazy(PARAVIRT_LAZY_MMU);
+}
+
+void paravirt_leave_lazy_mmu(void)
+{
+       paravirt_leave_lazy(PARAVIRT_LAZY_MMU);
+}
+
+void paravirt_enter_lazy_cpu(void)
+{
+       enter_lazy(PARAVIRT_LAZY_CPU);
+}
+
+void paravirt_leave_lazy_cpu(void)
+{
+       paravirt_leave_lazy(PARAVIRT_LAZY_CPU);
+}
+
+enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
+{
+       return x86_read_percpu(paravirt_lazy_mode);
+}
+
+struct pv_info pv_info = {
        .name = "bare hardware",
        .paravirt_enabled = 0,
        .kernel_rpl = 0,
        .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
+};
 
-       .patch = native_patch,
+struct pv_init_ops pv_init_ops = {
+       .patch = native_patch,
        .banner = default_banner,
        .arch_setup = paravirt_nop,
        .memory_setup = machine_specific_memory_setup,
+};
+
+struct pv_time_ops pv_time_ops = {
+       .time_init = hpet_time_init,
        .get_wallclock = native_get_wallclock,
        .set_wallclock = native_set_wallclock,
-       .time_init = hpet_time_init,
+       .sched_clock = native_sched_clock,
+       .get_cpu_khz = native_calculate_cpu_khz,
+};
+
+struct pv_irq_ops pv_irq_ops = {
        .init_IRQ = native_init_IRQ,
+       .save_fl = native_save_fl,
+       .restore_fl = native_restore_fl,
+       .irq_disable = native_irq_disable,
+       .irq_enable = native_irq_enable,
+       .safe_halt = native_safe_halt,
+       .halt = native_halt,
+};
 
+struct pv_cpu_ops pv_cpu_ops = {
        .cpuid = native_cpuid,
        .get_debugreg = native_get_debugreg,
        .set_debugreg = native_set_debugreg,
        .clts = native_clts,
        .read_cr0 = native_read_cr0,
        .write_cr0 = native_write_cr0,
-       .read_cr2 = native_read_cr2,
-       .write_cr2 = native_write_cr2,
-       .read_cr3 = native_read_cr3,
-       .write_cr3 = native_write_cr3,
        .read_cr4 = native_read_cr4,
        .read_cr4_safe = native_read_cr4_safe,
        .write_cr4 = native_write_cr4,
-       .save_fl = native_save_fl,
-       .restore_fl = native_restore_fl,
-       .irq_disable = native_irq_disable,
-       .irq_enable = native_irq_enable,
-       .safe_halt = native_safe_halt,
-       .halt = native_halt,
        .wbinvd = native_wbinvd,
        .read_msr = native_read_msr_safe,
        .write_msr = native_write_msr_safe,
        .read_tsc = native_read_tsc,
        .read_pmc = native_read_pmc,
-       .sched_clock = native_sched_clock,
-       .get_cpu_khz = native_calculate_cpu_khz,
        .load_tr_desc = native_load_tr_desc,
        .set_ldt = native_set_ldt,
        .load_gdt = native_load_gdt,
@@ -327,9 +384,19 @@ struct paravirt_ops paravirt_ops = {
        .write_idt_entry = write_dt_entry,
        .load_esp0 = native_load_esp0,
 
+       .irq_enable_sysexit = native_irq_enable_sysexit,
+       .iret = native_iret,
+
        .set_iopl_mask = native_set_iopl_mask,
        .io_delay = native_io_delay,
 
+       .lazy_mode = {
+               .enter = paravirt_nop,
+               .leave = paravirt_nop,
+       },
+};
+
+struct pv_apic_ops pv_apic_ops = {
 #ifdef CONFIG_X86_LOCAL_APIC
        .apic_write = native_apic_write,
        .apic_write_atomic = native_apic_write_atomic,
@@ -338,11 +405,17 @@ struct paravirt_ops paravirt_ops = {
        .setup_secondary_clock = setup_secondary_APIC_clock,
        .startup_ipi_hook = paravirt_nop,
 #endif
-       .set_lazy_mode = paravirt_nop,
+};
 
+struct pv_mmu_ops pv_mmu_ops = {
        .pagetable_setup_start = native_pagetable_setup_start,
        .pagetable_setup_done = native_pagetable_setup_done,
 
+       .read_cr2 = native_read_cr2,
+       .write_cr2 = native_write_cr2,
+       .read_cr3 = native_read_cr3,
+       .write_cr3 = native_write_cr3,
+
        .flush_tlb_user = native_flush_tlb,
        .flush_tlb_kernel = native_flush_tlb_global,
        .flush_tlb_single = native_flush_tlb_single,
@@ -381,12 +454,19 @@ struct paravirt_ops paravirt_ops = {
        .make_pte = native_make_pte,
        .make_pgd = native_make_pgd,
 
-       .irq_enable_sysexit = native_irq_enable_sysexit,
-       .iret = native_iret,
-
        .dup_mmap = paravirt_nop,
        .exit_mmap = paravirt_nop,
        .activate_mm = paravirt_nop,
+
+       .lazy_mode = {
+               .enter = paravirt_nop,
+               .leave = paravirt_nop,
+       },
 };
 
-EXPORT_SYMBOL(paravirt_ops);
+EXPORT_SYMBOL_GPL(pv_time_ops);
+EXPORT_SYMBOL_GPL(pv_cpu_ops);
+EXPORT_SYMBOL_GPL(pv_mmu_ops);
+EXPORT_SYMBOL_GPL(pv_apic_ops);
+EXPORT_SYMBOL_GPL(pv_info);
+EXPORT_SYMBOL    (pv_irq_ops);
index 18673e0..f02bad6 100644 (file)
@@ -134,21 +134,21 @@ static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
                          unsigned long eip, unsigned len)
 {
        switch (type) {
-               case PARAVIRT_PATCH(irq_disable):
+               case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
                        return patch_internal(VMI_CALL_DisableInterrupts, len,
                                              insns, eip);
-               case PARAVIRT_PATCH(irq_enable):
+               case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
                        return patch_internal(VMI_CALL_EnableInterrupts, len,
                                              insns, eip);
-               case PARAVIRT_PATCH(restore_fl):
+               case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
                        return patch_internal(VMI_CALL_SetInterruptMask, len,
                                              insns, eip);
-               case PARAVIRT_PATCH(save_fl):
+               case PARAVIRT_PATCH(pv_irq_ops.save_fl):
                        return patch_internal(VMI_CALL_GetInterruptMask, len,
                                              insns, eip);
-               case PARAVIRT_PATCH(iret):
+               case PARAVIRT_PATCH(pv_cpu_ops.iret):
                        return patch_internal(VMI_CALL_IRET, len, insns, eip);
-               case PARAVIRT_PATCH(irq_enable_sysexit):
+               case PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit):
                        return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
                default:
                        break;
@@ -552,24 +552,22 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
 }
 #endif
 
-static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode)
+static void vmi_enter_lazy_cpu(void)
 {
-       static DEFINE_PER_CPU(enum paravirt_lazy_mode, lazy_mode);
-
-       if (!vmi_ops.set_lazy_mode)
-               return;
+       paravirt_enter_lazy_cpu();
+       vmi_ops.set_lazy_mode(2);
+}
 
-       /* Modes should never nest or overlap */
-       BUG_ON(__get_cpu_var(lazy_mode) && !(mode == PARAVIRT_LAZY_NONE ||
-                                            mode == PARAVIRT_LAZY_FLUSH));
+static void vmi_enter_lazy_mmu(void)
+{
+       paravirt_enter_lazy_mmu();
+       vmi_ops.set_lazy_mode(1);
+}
 
-       if (mode == PARAVIRT_LAZY_FLUSH) {
-               vmi_ops.set_lazy_mode(0);
-               vmi_ops.set_lazy_mode(__get_cpu_var(lazy_mode));
-       } else {
-               vmi_ops.set_lazy_mode(mode);
-               __get_cpu_var(lazy_mode) = mode;
-       }
+static void vmi_leave_lazy(void)
+{
+       paravirt_leave_lazy(paravirt_get_lazy_mode());
+       vmi_ops.set_lazy_mode(0);
 }
 
 static inline int __init check_vmi_rom(struct vrom_header *rom)
@@ -690,9 +688,9 @@ do {                                                                \
        reloc = call_vrom_long_func(vmi_rom, get_reloc,         \
                                    VMI_CALL_##vmicall);        \
        if (rel->type == VMI_RELOCATION_CALL_REL)               \
-               paravirt_ops.opname = (void *)rel->eip;         \
+               opname = (void *)rel->eip;                      \
        else if (rel->type == VMI_RELOCATION_NOP)               \
-               paravirt_ops.opname = (void *)vmi_nop;          \
+               opname = (void *)vmi_nop;                       \
        else if (rel->type != VMI_RELOCATION_NONE)              \
                printk(KERN_WARNING "VMI: Unknown relocation "  \
                                    "type %d for " #vmicall"\n",\
@@ -712,7 +710,7 @@ do {                                                                \
                                    VMI_CALL_##vmicall);        \
        BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL);           \
        if (rel->type == VMI_RELOCATION_CALL_REL) {             \
-               paravirt_ops.opname = wrapper;                  \
+               opname = wrapper;                               \
                vmi_ops.cache = (void *)rel->eip;               \
        }                                                       \
 } while (0)
@@ -732,11 +730,11 @@ static inline int __init activate_vmi(void)
        }
        savesegment(cs, kernel_cs);
 
-       paravirt_ops.paravirt_enabled = 1;
-       paravirt_ops.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
+       pv_info.paravirt_enabled = 1;
+       pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
+       pv_info.name = "vmi";
 
-       paravirt_ops.patch = vmi_patch;
-       paravirt_ops.name = "vmi";
+       pv_init_ops.patch = vmi_patch;
 
        /*
         * Many of these operations are ABI compatible with VMI.
@@ -754,26 +752,26 @@ static inline int __init activate_vmi(void)
         */
 
        /* CPUID is special, so very special it gets wrapped like a present */
-       para_wrap(cpuid, vmi_cpuid, cpuid, CPUID);
-
-       para_fill(clts, CLTS);
-       para_fill(get_debugreg, GetDR);
-       para_fill(set_debugreg, SetDR);
-       para_fill(read_cr0, GetCR0);
-       para_fill(read_cr2, GetCR2);
-       para_fill(read_cr3, GetCR3);
-       para_fill(read_cr4, GetCR4);
-       para_fill(write_cr0, SetCR0);
-       para_fill(write_cr2, SetCR2);
-       para_fill(write_cr3, SetCR3);
-       para_fill(write_cr4, SetCR4);
-       para_fill(save_fl, GetInterruptMask);
-       para_fill(restore_fl, SetInterruptMask);
-       para_fill(irq_disable, DisableInterrupts);
-       para_fill(irq_enable, EnableInterrupts);
-
-       para_fill(wbinvd, WBINVD);
-       para_fill(read_tsc, RDTSC);
+       para_wrap(pv_cpu_ops.cpuid, vmi_cpuid, cpuid, CPUID);
+
+       para_fill(pv_cpu_ops.clts, CLTS);
+       para_fill(pv_cpu_ops.get_debugreg, GetDR);
+       para_fill(pv_cpu_ops.set_debugreg, SetDR);
+       para_fill(pv_cpu_ops.read_cr0, GetCR0);
+       para_fill(pv_mmu_ops.read_cr2, GetCR2);
+       para_fill(pv_mmu_ops.read_cr3, GetCR3);
+       para_fill(pv_cpu_ops.read_cr4, GetCR4);
+       para_fill(pv_cpu_ops.write_cr0, SetCR0);
+       para_fill(pv_mmu_ops.write_cr2, SetCR2);
+       para_fill(pv_mmu_ops.write_cr3, SetCR3);
+       para_fill(pv_cpu_ops.write_cr4, SetCR4);
+       para_fill(pv_irq_ops.save_fl, GetInterruptMask);
+       para_fill(pv_irq_ops.restore_fl, SetInterruptMask);
+       para_fill(pv_irq_ops.irq_disable, DisableInterrupts);
+       para_fill(pv_irq_ops.irq_enable, EnableInterrupts);
+
+       para_fill(pv_cpu_ops.wbinvd, WBINVD);
+       para_fill(pv_cpu_ops.read_tsc, RDTSC);
 
        /* The following we emulate with trap and emulate for now */
        /* paravirt_ops.read_msr = vmi_rdmsr */
@@ -781,29 +779,38 @@ static inline int __init activate_vmi(void)
        /* paravirt_ops.rdpmc = vmi_rdpmc */
 
        /* TR interface doesn't pass TR value, wrap */
-       para_wrap(load_tr_desc, vmi_set_tr, set_tr, SetTR);
+       para_wrap(pv_cpu_ops.load_tr_desc, vmi_set_tr, set_tr, SetTR);
 
        /* LDT is special, too */
-       para_wrap(set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
-
-       para_fill(load_gdt, SetGDT);
-       para_fill(load_idt, SetIDT);
-       para_fill(store_gdt, GetGDT);
-       para_fill(store_idt, GetIDT);
-       para_fill(store_tr, GetTR);
-       paravirt_ops.load_tls = vmi_load_tls;
-       para_fill(write_ldt_entry, WriteLDTEntry);
-       para_fill(write_gdt_entry, WriteGDTEntry);
-       para_fill(write_idt_entry, WriteIDTEntry);
-       para_wrap(load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
-       para_fill(set_iopl_mask, SetIOPLMask);
-       para_fill(io_delay, IODelay);
-       para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode);
+       para_wrap(pv_cpu_ops.set_ldt, vmi_set_ldt, _set_ldt, SetLDT);
+
+       para_fill(pv_cpu_ops.load_gdt, SetGDT);
+       para_fill(pv_cpu_ops.load_idt, SetIDT);
+       para_fill(pv_cpu_ops.store_gdt, GetGDT);
+       para_fill(pv_cpu_ops.store_idt, GetIDT);
+       para_fill(pv_cpu_ops.store_tr, GetTR);
+       pv_cpu_ops.load_tls = vmi_load_tls;
+       para_fill(pv_cpu_ops.write_ldt_entry, WriteLDTEntry);
+       para_fill(pv_cpu_ops.write_gdt_entry, WriteGDTEntry);
+       para_fill(pv_cpu_ops.write_idt_entry, WriteIDTEntry);
+       para_wrap(pv_cpu_ops.load_esp0, vmi_load_esp0, set_kernel_stack, UpdateKernelStack);
+       para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
+       para_fill(pv_cpu_ops.io_delay, IODelay);
+
+       para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu,
+                 set_lazy_mode, SetLazyMode);
+       para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy,
+                 set_lazy_mode, SetLazyMode);
+
+       para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
+                 set_lazy_mode, SetLazyMode);
+       para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy,
+                 set_lazy_mode, SetLazyMode);
 
        /* user and kernel flush are just handled with different flags to FlushTLB */
-       para_wrap(flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
-       para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
-       para_fill(flush_tlb_single, InvalPage);
+       para_wrap(pv_mmu_ops.flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
+       para_wrap(pv_mmu_ops.flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
+       para_fill(pv_mmu_ops.flush_tlb_single, InvalPage);
 
        /*
         * Until a standard flag format can be agreed on, we need to
@@ -819,41 +826,41 @@ static inline int __init activate_vmi(void)
 #endif
 
        if (vmi_ops.set_pte) {
-               paravirt_ops.set_pte = vmi_set_pte;
-               paravirt_ops.set_pte_at = vmi_set_pte_at;
-               paravirt_ops.set_pmd = vmi_set_pmd;
+               pv_mmu_ops.set_pte = vmi_set_pte;
+               pv_mmu_ops.set_pte_at = vmi_set_pte_at;
+               pv_mmu_ops.set_pmd = vmi_set_pmd;
 #ifdef CONFIG_X86_PAE
-               paravirt_ops.set_pte_atomic = vmi_set_pte_atomic;
-               paravirt_ops.set_pte_present = vmi_set_pte_present;
-               paravirt_ops.set_pud = vmi_set_pud;
-               paravirt_ops.pte_clear = vmi_pte_clear;
-               paravirt_ops.pmd_clear = vmi_pmd_clear;
+               pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic;
+               pv_mmu_ops.set_pte_present = vmi_set_pte_present;
+               pv_mmu_ops.set_pud = vmi_set_pud;
+               pv_mmu_ops.pte_clear = vmi_pte_clear;
+               pv_mmu_ops.pmd_clear = vmi_pmd_clear;
 #endif
        }
 
        if (vmi_ops.update_pte) {
-               paravirt_ops.pte_update = vmi_update_pte;
-               paravirt_ops.pte_update_defer = vmi_update_pte_defer;
+               pv_mmu_ops.pte_update = vmi_update_pte;
+               pv_mmu_ops.pte_update_defer = vmi_update_pte_defer;
        }
 
        vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
        if (vmi_ops.allocate_page) {
-               paravirt_ops.alloc_pt = vmi_allocate_pt;
-               paravirt_ops.alloc_pd = vmi_allocate_pd;
-               paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone;
+               pv_mmu_ops.alloc_pt = vmi_allocate_pt;
+               pv_mmu_ops.alloc_pd = vmi_allocate_pd;
+               pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone;
        }
 
        vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
        if (vmi_ops.release_page) {
-               paravirt_ops.release_pt = vmi_release_pt;
-               paravirt_ops.release_pd = vmi_release_pd;
+               pv_mmu_ops.release_pt = vmi_release_pt;
+               pv_mmu_ops.release_pd = vmi_release_pd;
        }
 
        /* Set linear is needed in all cases */
        vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
 #ifdef CONFIG_HIGHPTE
        if (vmi_ops.set_linear_mapping)
-               paravirt_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
+               pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
 #endif
 
        /*
@@ -863,17 +870,17 @@ static inline int __init activate_vmi(void)
         * the backend.  They are performance critical anyway, so requiring
         * a patch is not a big problem.
         */
-       paravirt_ops.irq_enable_sysexit = (void *)0xfeedbab0;
-       paravirt_ops.iret = (void *)0xbadbab0;
+       pv_cpu_ops.irq_enable_sysexit = (void *)0xfeedbab0;
+       pv_cpu_ops.iret = (void *)0xbadbab0;
 
 #ifdef CONFIG_SMP
-       para_wrap(startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
+       para_wrap(pv_apic_ops.startup_ipi_hook, vmi_startup_ipi_hook, set_initial_ap_state, SetInitialAPState);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
-       para_fill(apic_read, APICRead);
-       para_fill(apic_write, APICWrite);
-       para_fill(apic_write_atomic, APICWrite);
+       para_fill(pv_apic_ops.apic_read, APICRead);
+       para_fill(pv_apic_ops.apic_write, APICWrite);
+       para_fill(pv_apic_ops.apic_write_atomic, APICWrite);
 #endif
 
        /*
@@ -891,15 +898,15 @@ static inline int __init activate_vmi(void)
                vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
                vmi_timer_ops.cancel_alarm =
                         vmi_get_function(VMI_CALL_CancelAlarm);
-               paravirt_ops.time_init = vmi_time_init;
-               paravirt_ops.get_wallclock = vmi_get_wallclock;
-               paravirt_ops.set_wallclock = vmi_set_wallclock;
+               pv_time_ops.time_init = vmi_time_init;
+               pv_time_ops.get_wallclock = vmi_get_wallclock;
+               pv_time_ops.set_wallclock = vmi_set_wallclock;
 #ifdef CONFIG_X86_LOCAL_APIC
-               paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
-               paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
+               pv_apic_ops.setup_boot_clock = vmi_time_bsp_init;
+               pv_apic_ops.setup_secondary_clock = vmi_time_ap_init;
 #endif
-               paravirt_ops.sched_clock = vmi_sched_clock;
-               paravirt_ops.get_cpu_khz = vmi_cpu_khz;
+               pv_time_ops.sched_clock = vmi_sched_clock;
+               pv_time_ops.get_cpu_khz = vmi_cpu_khz;
 
                /* We have true wallclock functions; disable CMOS clock sync */
                no_sync_cmos_clock = 1;
@@ -908,7 +915,7 @@ static inline int __init activate_vmi(void)
                disable_vmi_timer = 1;
        }
 
-       para_fill(safe_halt, Halt);
+       para_fill(pv_irq_ops.safe_halt, Halt);
 
        /*
         * Alternative instruction rewriting doesn't happen soon enough
index e4e37d4..c7d1947 100644 (file)
@@ -748,24 +748,12 @@ struct kmem_cache *pmd_cache;
 
 void __init pgtable_cache_init(void)
 {
-       size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
-
-       if (PTRS_PER_PMD > 1) {
+       if (PTRS_PER_PMD > 1)
                pmd_cache = kmem_cache_create("pmd",
-                                       PTRS_PER_PMD*sizeof(pmd_t),
-                                       PTRS_PER_PMD*sizeof(pmd_t),
-                                       SLAB_PANIC,
-                                       pmd_ctor);
-               if (!SHARED_KERNEL_PMD) {
-                       /* If we're in PAE mode and have a non-shared
-                          kernel pmd, then the pgd size must be a
-                          page size.  This is because the pgd_list
-                          links through the page structure, so there
-                          can only be one pgd per page for this to
-                          work. */
-                       pgd_size = PAGE_SIZE;
-               }
-       }
+                                             PTRS_PER_PMD*sizeof(pmd_t),
+                                             PTRS_PER_PMD*sizeof(pmd_t),
+                                             SLAB_PANIC,
+                                             pmd_ctor);
 }
 
 /*
index 265f7dd..94c39aa 100644 (file)
 
 EXPORT_SYMBOL_GPL(hypercall_page);
 
-DEFINE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
-
 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
-DEFINE_PER_CPU(unsigned long, xen_cr3);
+
+/*
+ * Note about cr3 (pagetable base) values:
+ *
+ * xen_cr3 contains the current logical cr3 value; it contains the
+ * last set cr3.  This may not be the current effective cr3, because
+ * its update may be being lazily deferred.  However, a vcpu looking
+ * at its own cr3 can use this value knowing that it everything will
+ * be self-consistent.
+ *
+ * xen_current_cr3 contains the actual vcpu cr3; it is set once the
+ * hypercall to set the vcpu cr3 is complete (so it may be a little
+ * out of date, but it will never be set early).  If one vcpu is
+ * looking at another vcpu's cr3 value, it should use this variable.
+ */
+DEFINE_PER_CPU(unsigned long, xen_cr3);         /* cr3 stored as physaddr */
+DEFINE_PER_CPU(unsigned long, xen_current_cr3);         /* actual vcpu cr3 */
 
 struct start_info *xen_start_info;
 EXPORT_SYMBOL_GPL(xen_start_info);
@@ -99,7 +113,7 @@ static void __init xen_vcpu_setup(int cpu)
        info.mfn = virt_to_mfn(vcpup);
        info.offset = offset_in_page(vcpup);
 
-       printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %x, offset %d\n",
+       printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
               cpu, vcpup, info.mfn, info.offset);
 
        /* Check to see if the hypervisor will put the vcpu_info
@@ -123,7 +137,7 @@ static void __init xen_vcpu_setup(int cpu)
 static void __init xen_banner(void)
 {
        printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
-              paravirt_ops.name);
+              pv_info.name);
        printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
 }
 
@@ -248,29 +262,10 @@ static void xen_halt(void)
                xen_safe_halt();
 }
 
-static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
+static void xen_leave_lazy(void)
 {
-       BUG_ON(preemptible());
-
-       switch (mode) {
-       case PARAVIRT_LAZY_NONE:
-               BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
-               break;
-
-       case PARAVIRT_LAZY_MMU:
-       case PARAVIRT_LAZY_CPU:
-               BUG_ON(x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE);
-               break;
-
-       case PARAVIRT_LAZY_FLUSH:
-               /* flush if necessary, but don't change state */
-               if (x86_read_percpu(xen_lazy_mode) != PARAVIRT_LAZY_NONE)
-                       xen_mc_flush();
-               return;
-       }
-
+       paravirt_leave_lazy(paravirt_get_lazy_mode());
        xen_mc_flush();
-       x86_write_percpu(xen_lazy_mode, mode);
 }
 
 static unsigned long xen_store_tr(void)
@@ -357,7 +352,7 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
         * loaded properly.  This will go away as soon as Xen has been
         * modified to not save/restore %gs for normal hypercalls.
         */
-       if (xen_get_lazy_mode() == PARAVIRT_LAZY_CPU)
+       if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)
                loadsegment(gs, 0);
 }
 
@@ -631,32 +626,36 @@ static unsigned long xen_read_cr3(void)
        return x86_read_percpu(xen_cr3);
 }
 
+static void set_current_cr3(void *v)
+{
+       x86_write_percpu(xen_current_cr3, (unsigned long)v);
+}
+
 static void xen_write_cr3(unsigned long cr3)
 {
+       struct mmuext_op *op;
+       struct multicall_space mcs;
+       unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
+
        BUG_ON(preemptible());
 
-       if (cr3 == x86_read_percpu(xen_cr3)) {
-               /* just a simple tlb flush */
-               xen_flush_tlb();
-               return;
-       }
+       mcs = xen_mc_entry(sizeof(*op));  /* disables interrupts */
 
+       /* Update while interrupts are disabled, so its atomic with
+          respect to ipis */
        x86_write_percpu(xen_cr3, cr3);
 
+       op = mcs.args;
+       op->cmd = MMUEXT_NEW_BASEPTR;
+       op->arg1.mfn = mfn;
 
-       {
-               struct mmuext_op *op;
-               struct multicall_space mcs = xen_mc_entry(sizeof(*op));
-               unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3));
-
-               op = mcs.args;
-               op->cmd = MMUEXT_NEW_BASEPTR;
-               op->arg1.mfn = mfn;
+       MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 
-               MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+       /* Update xen_update_cr3 once the batch has actually
+          been submitted. */
+       xen_mc_callback(set_current_cr3, (void *)cr3);
 
-               xen_mc_issue(PARAVIRT_LAZY_CPU);
-       }
+       xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
 }
 
 /* Early in boot, while setting up the initial pagetable, assume
@@ -667,6 +666,15 @@ static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn)
        make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
 }
 
+static void pin_pagetable_pfn(unsigned level, unsigned long pfn)
+{
+       struct mmuext_op op;
+       op.cmd = level;
+       op.arg1.mfn = pfn_to_mfn(pfn);
+       if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+               BUG();
+}
+
 /* This needs to make sure the new pte page is pinned iff its being
    attached to a pinned pagetable. */
 static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
@@ -676,9 +684,10 @@ static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
        if (PagePinned(virt_to_page(mm->pgd))) {
                SetPagePinned(page);
 
-               if (!PageHighMem(page))
+               if (!PageHighMem(page)) {
                        make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
-               else
+                       pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+               } else
                        /* make sure there are no stray mappings of
                           this page */
                        kmap_flush_unused();
@@ -691,8 +700,10 @@ static void xen_release_pt(u32 pfn)
        struct page *page = pfn_to_page(pfn);
 
        if (PagePinned(page)) {
-               if (!PageHighMem(page))
+               if (!PageHighMem(page)) {
+                       pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
                        make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
+               }
        }
 }
 
@@ -737,7 +748,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
        pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
 
        /* special set_pte for pagetable initialization */
-       paravirt_ops.set_pte = xen_set_pte_init;
+       pv_mmu_ops.set_pte = xen_set_pte_init;
 
        init_mm.pgd = base;
        /*
@@ -784,8 +795,8 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
 {
        /* This will work as long as patching hasn't happened yet
           (which it hasn't) */
-       paravirt_ops.alloc_pt = xen_alloc_pt;
-       paravirt_ops.set_pte = xen_set_pte;
+       pv_mmu_ops.alloc_pt = xen_alloc_pt;
+       pv_mmu_ops.set_pte = xen_set_pte;
 
        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
                /*
@@ -807,15 +818,15 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
        /* Actually pin the pagetable down, but we can't set PG_pinned
           yet because the page structures don't exist yet. */
        {
-               struct mmuext_op op;
+               unsigned level;
+
 #ifdef CONFIG_X86_PAE
-               op.cmd = MMUEXT_PIN_L3_TABLE;
+               level = MMUEXT_PIN_L3_TABLE;
 #else
-               op.cmd = MMUEXT_PIN_L3_TABLE;
+               level = MMUEXT_PIN_L2_TABLE;
 #endif
-               op.arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(base)));
-               if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
-                       BUG();
+
+               pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
        }
 }
 
@@ -832,12 +843,12 @@ void __init xen_setup_vcpu_info_placement(void)
        if (have_vcpu_info_placement) {
                printk(KERN_INFO "Xen: using vcpu_info placement\n");
 
-               paravirt_ops.save_fl = xen_save_fl_direct;
-               paravirt_ops.restore_fl = xen_restore_fl_direct;
-               paravirt_ops.irq_disable = xen_irq_disable_direct;
-               paravirt_ops.irq_enable = xen_irq_enable_direct;
-               paravirt_ops.read_cr2 = xen_read_cr2_direct;
-               paravirt_ops.iret = xen_iret_direct;
+               pv_irq_ops.save_fl = xen_save_fl_direct;
+               pv_irq_ops.restore_fl = xen_restore_fl_direct;
+               pv_irq_ops.irq_disable = xen_irq_disable_direct;
+               pv_irq_ops.irq_enable = xen_irq_enable_direct;
+               pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
+               pv_cpu_ops.iret = xen_iret_direct;
        }
 }
 
@@ -849,8 +860,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
 
        start = end = reloc = NULL;
 
-#define SITE(x)                                                                \
-       case PARAVIRT_PATCH(x):                                         \
+#define SITE(op, x)                                                    \
+       case PARAVIRT_PATCH(op.x):                                      \
        if (have_vcpu_info_placement) {                                 \
                start = (char *)xen_##x##_direct;                       \
                end = xen_##x##_direct_end;                             \
@@ -859,10 +870,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
        goto patch_site
 
        switch (type) {
-               SITE(irq_enable);
-               SITE(irq_disable);
-               SITE(save_fl);
-               SITE(restore_fl);
+               SITE(pv_irq_ops, irq_enable);
+               SITE(pv_irq_ops, irq_disable);
+               SITE(pv_irq_ops, save_fl);
+               SITE(pv_irq_ops, restore_fl);
 #undef SITE
 
        patch_site:
@@ -894,26 +905,32 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
        return ret;
 }
 
-static const struct paravirt_ops xen_paravirt_ops __initdata = {
+static const struct pv_info xen_info __initdata = {
        .paravirt_enabled = 1,
        .shared_kernel_pmd = 0,
 
        .name = "Xen",
-       .banner = xen_banner,
+};
 
+static const struct pv_init_ops xen_init_ops __initdata = {
        .patch = xen_patch,
 
+       .banner = xen_banner,
        .memory_setup = xen_memory_setup,
        .arch_setup = xen_arch_setup,
-       .init_IRQ = xen_init_IRQ,
        .post_allocator_init = xen_mark_init_mm_pinned,
+};
 
+static const struct pv_time_ops xen_time_ops __initdata = {
        .time_init = xen_time_init,
+
        .set_wallclock = xen_set_wallclock,
        .get_wallclock = xen_get_wallclock,
        .get_cpu_khz = xen_cpu_khz,
        .sched_clock = xen_sched_clock,
+};
 
+static const struct pv_cpu_ops xen_cpu_ops __initdata = {
        .cpuid = xen_cpuid,
 
        .set_debugreg = xen_set_debugreg,
@@ -924,22 +941,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
        .read_cr0 = native_read_cr0,
        .write_cr0 = native_write_cr0,
 
-       .read_cr2 = xen_read_cr2,
-       .write_cr2 = xen_write_cr2,
-
-       .read_cr3 = xen_read_cr3,
-       .write_cr3 = xen_write_cr3,
-
        .read_cr4 = native_read_cr4,
        .read_cr4_safe = native_read_cr4_safe,
        .write_cr4 = xen_write_cr4,
 
-       .save_fl = xen_save_fl,
-       .restore_fl = xen_restore_fl,
-       .irq_disable = xen_irq_disable,
-       .irq_enable = xen_irq_enable,
-       .safe_halt = xen_safe_halt,
-       .halt = xen_halt,
        .wbinvd = native_wbinvd,
 
        .read_msr = native_read_msr_safe,
@@ -968,6 +973,23 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
        .set_iopl_mask = xen_set_iopl_mask,
        .io_delay = xen_io_delay,
 
+       .lazy_mode = {
+               .enter = paravirt_enter_lazy_cpu,
+               .leave = xen_leave_lazy,
+       },
+};
+
+static const struct pv_irq_ops xen_irq_ops __initdata = {
+       .init_IRQ = xen_init_IRQ,
+       .save_fl = xen_save_fl,
+       .restore_fl = xen_restore_fl,
+       .irq_disable = xen_irq_disable,
+       .irq_enable = xen_irq_enable,
+       .safe_halt = xen_safe_halt,
+       .halt = xen_halt,
+};
+
+static const struct pv_apic_ops xen_apic_ops __initdata = {
 #ifdef CONFIG_X86_LOCAL_APIC
        .apic_write = xen_apic_write,
        .apic_write_atomic = xen_apic_write,
@@ -976,6 +998,17 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
        .setup_secondary_clock = paravirt_nop,
        .startup_ipi_hook = paravirt_nop,
 #endif
+};
+
+static const struct pv_mmu_ops xen_mmu_ops __initdata = {
+       .pagetable_setup_start = xen_pagetable_setup_start,
+       .pagetable_setup_done = xen_pagetable_setup_done,
+
+       .read_cr2 = xen_read_cr2,
+       .write_cr2 = xen_write_cr2,
+
+       .read_cr3 = xen_read_cr3,
+       .write_cr3 = xen_write_cr3,
 
        .flush_tlb_user = xen_flush_tlb,
        .flush_tlb_kernel = xen_flush_tlb,
@@ -985,9 +1018,6 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
        .pte_update = paravirt_nop,
        .pte_update_defer = paravirt_nop,
 
-       .pagetable_setup_start = xen_pagetable_setup_start,
-       .pagetable_setup_done = xen_pagetable_setup_done,
-
        .alloc_pt = xen_alloc_pt_init,
        .release_pt = xen_release_pt,
        .alloc_pd = paravirt_nop,
@@ -1023,7 +1053,10 @@ static const struct paravirt_ops xen_paravirt_ops __initdata = {
        .dup_mmap = xen_dup_mmap,
        .exit_mmap = xen_exit_mmap,
 
-       .set_lazy_mode = xen_set_lazy_mode,
+       .lazy_mode = {
+               .enter = paravirt_enter_lazy_mmu,
+               .leave = xen_leave_lazy,
+       },
 };
 
 #ifdef CONFIG_SMP
@@ -1079,6 +1112,17 @@ static const struct machine_ops __initdata xen_machine_ops = {
 };
 
 
+static void __init xen_reserve_top(void)
+{
+       unsigned long top = HYPERVISOR_VIRT_START;
+       struct xen_platform_parameters pp;
+
+       if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
+               top = pp.virt_start;
+
+       reserve_top_address(-top + 2 * PAGE_SIZE);
+}
+
 /* First C function to be called on Xen boot */
 asmlinkage void __init xen_start_kernel(void)
 {
@@ -1090,7 +1134,14 @@ asmlinkage void __init xen_start_kernel(void)
        BUG_ON(memcmp(xen_start_info->magic, "xen-3.0", 7) != 0);
 
        /* Install Xen paravirt ops */
-       paravirt_ops = xen_paravirt_ops;
+       pv_info = xen_info;
+       pv_init_ops = xen_init_ops;
+       pv_time_ops = xen_time_ops;
+       pv_cpu_ops = xen_cpu_ops;
+       pv_irq_ops = xen_irq_ops;
+       pv_apic_ops = xen_apic_ops;
+       pv_mmu_ops = xen_mmu_ops;
+
        machine_ops = xen_machine_ops;
 
 #ifdef CONFIG_SMP
@@ -1112,6 +1163,7 @@ asmlinkage void __init xen_start_kernel(void)
        /* keep using Xen gdt for now; no urgent need to change it */
 
        x86_write_percpu(xen_cr3, __pa(pgd));
+       x86_write_percpu(xen_current_cr3, __pa(pgd));
 
 #ifdef CONFIG_SMP
        /* Don't do the full vcpu_info placement stuff until we have a
@@ -1123,12 +1175,12 @@ asmlinkage void __init xen_start_kernel(void)
        xen_setup_vcpu_info_placement();
 #endif
 
-       paravirt_ops.kernel_rpl = 1;
+       pv_info.kernel_rpl = 1;
        if (xen_feature(XENFEAT_supervisor_mode_kernel))
-               paravirt_ops.kernel_rpl = 0;
+               pv_info.kernel_rpl = 0;
 
        /* set the limit of our address space */
-       reserve_top_address(-HYPERVISOR_VIRT_START + 2 * PAGE_SIZE);
+       xen_reserve_top();
 
        /* set up basic CPUID stuff */
        cpu_detect(&new_cpu_data);
index 0bb7f00..b2e32f9 100644 (file)
@@ -154,7 +154,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
                    pte_t *ptep, pte_t pteval)
 {
        if (mm == current->mm || mm == &init_mm) {
-               if (xen_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+               if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
                        struct multicall_space mcs;
                        mcs = xen_mc_entry(0);
 
@@ -303,7 +303,12 @@ pgd_t xen_make_pgd(unsigned long pgd)
 }
 #endif /* CONFIG_X86_PAE */
 
-
+enum pt_level {
+       PT_PGD,
+       PT_PUD,
+       PT_PMD,
+       PT_PTE
+};
 
 /*
   (Yet another) pagetable walker.  This one is intended for pinning a
@@ -315,7 +320,7 @@ pgd_t xen_make_pgd(unsigned long pgd)
   FIXADDR_TOP.  But the important bit is that we don't pin beyond
   there, because then we start getting into Xen's ptes.
 */
-static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
+static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level),
                    unsigned long limit)
 {
        pgd_t *pgd = pgd_base;
@@ -340,7 +345,7 @@ static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
                pud = pud_offset(pgd, 0);
 
                if (PTRS_PER_PUD > 1) /* not folded */
-                       flush |= (*func)(virt_to_page(pud), 0);
+                       flush |= (*func)(virt_to_page(pud), PT_PUD);
 
                for (; addr != pud_limit; pud++, addr = pud_next) {
                        pmd_t *pmd;
@@ -359,7 +364,7 @@ static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
                        pmd = pmd_offset(pud, 0);
 
                        if (PTRS_PER_PMD > 1) /* not folded */
-                               flush |= (*func)(virt_to_page(pmd), 0);
+                               flush |= (*func)(virt_to_page(pmd), PT_PMD);
 
                        for (; addr != pmd_limit; pmd++) {
                                addr += (PAGE_SIZE * PTRS_PER_PTE);
@@ -371,17 +376,47 @@ static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, unsigned),
                                if (pmd_none(*pmd))
                                        continue;
 
-                               flush |= (*func)(pmd_page(*pmd), 0);
+                               flush |= (*func)(pmd_page(*pmd), PT_PTE);
                        }
                }
        }
 
-       flush |= (*func)(virt_to_page(pgd_base), UVMF_TLB_FLUSH);
+       flush |= (*func)(virt_to_page(pgd_base), PT_PGD);
 
        return flush;
 }
 
-static int pin_page(struct page *page, unsigned flags)
+static spinlock_t *lock_pte(struct page *page)
+{
+       spinlock_t *ptl = NULL;
+
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+       ptl = __pte_lockptr(page);
+       spin_lock(ptl);
+#endif
+
+       return ptl;
+}
+
+static void do_unlock(void *v)
+{
+       spinlock_t *ptl = v;
+       spin_unlock(ptl);
+}
+
+static void xen_do_pin(unsigned level, unsigned long pfn)
+{
+       struct mmuext_op *op;
+       struct multicall_space mcs;
+
+       mcs = __xen_mc_entry(sizeof(*op));
+       op = mcs.args;
+       op->cmd = level;
+       op->arg1.mfn = pfn_to_mfn(pfn);
+       MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+}
+
+static int pin_page(struct page *page, enum pt_level level)
 {
        unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags);
        int flush;
@@ -396,12 +431,26 @@ static int pin_page(struct page *page, unsigned flags)
                void *pt = lowmem_page_address(page);
                unsigned long pfn = page_to_pfn(page);
                struct multicall_space mcs = __xen_mc_entry(0);
+               spinlock_t *ptl;
 
                flush = 0;
 
+               ptl = NULL;
+               if (level == PT_PTE)
+                       ptl = lock_pte(page);
+
                MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
                                        pfn_pte(pfn, PAGE_KERNEL_RO),
-                                       flags);
+                                       level == PT_PGD ? UVMF_TLB_FLUSH : 0);
+
+               if (level == PT_PTE)
+                       xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
+
+               if (ptl) {
+                       /* Queue a deferred unlock for when this batch
+                          is completed. */
+                       xen_mc_callback(do_unlock, ptl);
+               }
        }
 
        return flush;
@@ -412,8 +461,7 @@ static int pin_page(struct page *page, unsigned flags)
    read-only, and can be pinned. */
 void xen_pgd_pin(pgd_t *pgd)
 {
-       struct multicall_space mcs;
-       struct mmuext_op *op;
+       unsigned level;
 
        xen_mc_batch();
 
@@ -424,16 +472,13 @@ void xen_pgd_pin(pgd_t *pgd)
                xen_mc_batch();
        }
 
-       mcs = __xen_mc_entry(sizeof(*op));
-       op = mcs.args;
-
 #ifdef CONFIG_X86_PAE
-       op->cmd = MMUEXT_PIN_L3_TABLE;
+       level = MMUEXT_PIN_L3_TABLE;
 #else
-       op->cmd = MMUEXT_PIN_L2_TABLE;
+       level = MMUEXT_PIN_L2_TABLE;
 #endif
-       op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd)));
-       MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+
+       xen_do_pin(level, PFN_DOWN(__pa(pgd)));
 
        xen_mc_issue(0);
 }
@@ -441,7 +486,7 @@ void xen_pgd_pin(pgd_t *pgd)
 /* The init_mm pagetable is really pinned as soon as its created, but
    that's before we have page structures to store the bits.  So do all
    the book-keeping now. */
-static __init int mark_pinned(struct page *page, unsigned flags)
+static __init int mark_pinned(struct page *page, enum pt_level level)
 {
        SetPagePinned(page);
        return 0;
@@ -452,18 +497,32 @@ void __init xen_mark_init_mm_pinned(void)
        pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
 }
 
-static int unpin_page(struct page *page, unsigned flags)
+static int unpin_page(struct page *page, enum pt_level level)
 {
        unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags);
 
        if (pgfl && !PageHighMem(page)) {
                void *pt = lowmem_page_address(page);
                unsigned long pfn = page_to_pfn(page);
-               struct multicall_space mcs = __xen_mc_entry(0);
+               spinlock_t *ptl = NULL;
+               struct multicall_space mcs;
+
+               if (level == PT_PTE) {
+                       ptl = lock_pte(page);
+
+                       xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
+               }
+
+               mcs = __xen_mc_entry(0);
 
                MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
                                        pfn_pte(pfn, PAGE_KERNEL),
-                                       flags);
+                                       level == PT_PGD ? UVMF_TLB_FLUSH : 0);
+
+               if (ptl) {
+                       /* unlock when batch completed */
+                       xen_mc_callback(do_unlock, ptl);
+               }
        }
 
        return 0;               /* never need to flush on unpin */
@@ -472,18 +531,9 @@ static int unpin_page(struct page *page, unsigned flags)
 /* Release a pagetables pages back as normal RW */
 static void xen_pgd_unpin(pgd_t *pgd)
 {
-       struct mmuext_op *op;
-       struct multicall_space mcs;
-
        xen_mc_batch();
 
-       mcs = __xen_mc_entry(sizeof(*op));
-
-       op = mcs.args;
-       op->cmd = MMUEXT_UNPIN_TABLE;
-       op->arg1.mfn = pfn_to_mfn(PFN_DOWN(__pa(pgd)));
-
-       MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+       xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 
        pgd_walk(pgd, unpin_page, TASK_SIZE);
 
@@ -514,20 +564,43 @@ static void drop_other_mm_ref(void *info)
 
        if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
                leave_mm(smp_processor_id());
+
+       /* If this cpu still has a stale cr3 reference, then make sure
+          it has been flushed. */
+       if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) {
+               load_cr3(swapper_pg_dir);
+               arch_flush_lazy_cpu_mode();
+       }
 }
 
 static void drop_mm_ref(struct mm_struct *mm)
 {
+       cpumask_t mask;
+       unsigned cpu;
+
        if (current->active_mm == mm) {
                if (current->mm == mm)
                        load_cr3(swapper_pg_dir);
                else
                        leave_mm(smp_processor_id());
+               arch_flush_lazy_cpu_mode();
        }
 
-       if (!cpus_empty(mm->cpu_vm_mask))
-               xen_smp_call_function_mask(mm->cpu_vm_mask, drop_other_mm_ref,
-                                          mm, 1);
+       /* Get the "official" set of cpus referring to our pagetable. */
+       mask = mm->cpu_vm_mask;
+
+       /* It's possible that a vcpu may have a stale reference to our
+          cr3, because its in lazy mode, and it hasn't yet flushed
+          its set of pending hypercalls yet.  In this case, we can
+          look at its actual current cr3 value, and force it to flush
+          if needed. */
+       for_each_online_cpu(cpu) {
+               if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
+                       cpu_set(cpu, mask);
+       }
+
+       if (!cpus_empty(mask))
+               xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
 }
 #else
 static void drop_mm_ref(struct mm_struct *mm)
@@ -562,5 +635,6 @@ void xen_exit_mmap(struct mm_struct *mm)
        /* pgd may not be pinned in the error exit path of execve */
        if (PagePinned(virt_to_page(mm->pgd)))
                xen_pgd_unpin(mm->pgd);
+
        spin_unlock(&mm->page_table_lock);
 }
index c837e8e..5e6f36f 100644 (file)
 
 #include "multicalls.h"
 
+#define MC_DEBUG       1
+
 #define MC_BATCH       32
 #define MC_ARGS                (MC_BATCH * 16 / sizeof(u64))
 
 struct mc_buffer {
        struct multicall_entry entries[MC_BATCH];
+#if MC_DEBUG
+       struct multicall_entry debug[MC_BATCH];
+#endif
        u64 args[MC_ARGS];
-       unsigned mcidx, argidx;
+       struct callback {
+               void (*fn)(void *);
+               void *data;
+       } callbacks[MC_BATCH];
+       unsigned mcidx, argidx, cbidx;
 };
 
 static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
@@ -43,6 +52,7 @@ void xen_mc_flush(void)
        struct mc_buffer *b = &__get_cpu_var(mc_buffer);
        int ret = 0;
        unsigned long flags;
+       int i;
 
        BUG_ON(preemptible());
 
@@ -51,13 +61,31 @@ void xen_mc_flush(void)
        local_irq_save(flags);
 
        if (b->mcidx) {
-               int i;
+#if MC_DEBUG
+               memcpy(b->debug, b->entries,
+                      b->mcidx * sizeof(struct multicall_entry));
+#endif
 
                if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0)
                        BUG();
                for (i = 0; i < b->mcidx; i++)
                        if (b->entries[i].result < 0)
                                ret++;
+
+#if MC_DEBUG
+               if (ret) {
+                       printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
+                              ret, smp_processor_id());
+                       for(i = 0; i < b->mcidx; i++) {
+                               printk("  call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
+                                      i+1, b->mcidx,
+                                      b->debug[i].op,
+                                      b->debug[i].args[0],
+                                      b->entries[i].result);
+                       }
+               }
+#endif
+
                b->mcidx = 0;
                b->argidx = 0;
        } else
@@ -65,6 +93,13 @@ void xen_mc_flush(void)
 
        local_irq_restore(flags);
 
+       for(i = 0; i < b->cbidx; i++) {
+               struct callback *cb = &b->callbacks[i];
+
+               (*cb->fn)(cb->data);
+       }
+       b->cbidx = 0;
+
        BUG_ON(ret);
 }
 
@@ -88,3 +123,16 @@ struct multicall_space __xen_mc_entry(size_t args)
 
        return ret;
 }
+
+void xen_mc_callback(void (*fn)(void *), void *data)
+{
+       struct mc_buffer *b = &__get_cpu_var(mc_buffer);
+       struct callback *cb;
+
+       if (b->cbidx == MC_BATCH)
+               xen_mc_flush();
+
+       cb = &b->callbacks[b->cbidx++];
+       cb->fn = fn;
+       cb->data = data;
+}
index e6f7530..8bae996 100644 (file)
@@ -35,11 +35,14 @@ void xen_mc_flush(void);
 /* Issue a multicall if we're not in a lazy mode */
 static inline void xen_mc_issue(unsigned mode)
 {
-       if ((xen_get_lazy_mode() & mode) == 0)
+       if ((paravirt_get_lazy_mode() & mode) == 0)
                xen_mc_flush();
 
        /* restore flags saved in xen_mc_batch */
        local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
 }
 
+/* Set up a callback to be called when the current batch is flushed */
+void xen_mc_callback(void (*fn)(void *), void *data);
+
 #endif /* _XEN_MULTICALLS_H */
index 6c05858..c1b131b 100644 (file)
@@ -371,7 +371,8 @@ int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
                               void *info, int wait)
 {
        struct call_data_struct data;
-       int cpus;
+       int cpus, cpu;
+       bool yield;
 
        /* Holding any lock stops cpus from going down. */
        spin_lock(&call_lock);
@@ -400,9 +401,14 @@ int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
        /* Send a message to other CPUs and wait for them to respond */
        xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR);
 
-       /* Make sure other vcpus get a chance to run.
-          XXX too severe?  Maybe we should check the other CPU's states? */
-       HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+       /* Make sure other vcpus get a chance to run if they need to. */
+       yield = false;
+       for_each_cpu_mask(cpu, mask)
+               if (xen_vcpu_stolen(cpu))
+                       yield = true;
+
+       if (yield)
+               HYPERVISOR_sched_op(SCHEDOP_yield, 0);
 
        /* Wait for response */
        while (atomic_read(&data.started) != cpus ||
index dfd6db6..d083ff5 100644 (file)
@@ -105,6 +105,12 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
        } while (get64(&state->state_entry_time) != state_time);
 }
 
+/* return true when a vcpu could run but has no real cpu to run on */
+bool xen_vcpu_stolen(int vcpu)
+{
+       return per_cpu(runstate, vcpu).state == RUNSTATE_runnable;
+}
+
 static void setup_runstate_info(int cpu)
 {
        struct vcpu_register_runstate_memory_area area;
index b9aaea4..b02a909 100644 (file)
@@ -11,6 +11,7 @@ void xen_copy_trap_info(struct trap_info *traps);
 
 DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
 DECLARE_PER_CPU(unsigned long, xen_cr3);
+DECLARE_PER_CPU(unsigned long, xen_current_cr3);
 
 extern struct start_info *xen_start_info;
 extern struct shared_info *HYPERVISOR_shared_info;
@@ -27,14 +28,9 @@ unsigned long xen_get_wallclock(void);
 int xen_set_wallclock(unsigned long time);
 unsigned long long xen_sched_clock(void);
 
-void xen_mark_init_mm_pinned(void);
-
-DECLARE_PER_CPU(enum paravirt_lazy_mode, xen_lazy_mode);
+bool xen_vcpu_stolen(int vcpu);
 
-static inline unsigned xen_get_lazy_mode(void)
-{
-       return x86_read_percpu(xen_lazy_mode);
-}
+void xen_mark_init_mm_pinned(void);
 
 void __init xen_fill_possible_map(void);
 
index 9e3f3cc..3935469 100644 (file)
@@ -1322,8 +1322,8 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
                  struct scatterlist *sglist)
 {
        struct bio_vec *bvec, *bvprv;
-       struct scatterlist *next_sg, *sg;
        struct req_iterator iter;
+       struct scatterlist *sg;
        int nsegs, cluster;
 
        nsegs = 0;
@@ -1333,7 +1333,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
         * for each bio in rq
         */
        bvprv = NULL;
-       sg = next_sg = &sglist[0];
+       sg = NULL;
        rq_for_each_segment(bvec, rq, iter) {
                int nbytes = bvec->bv_len;
 
@@ -1349,8 +1349,10 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
                        sg->length += nbytes;
                } else {
 new_segment:
-                       sg = next_sg;
-                       next_sg = sg_next(sg);
+                       if (!sg)
+                               sg = sglist;
+                       else
+                               sg = sg_next(sg);
 
                        memset(sg, 0, sizeof(*sg));
                        sg->page = bvec->bv_page;
index 3d6bd0b..efccb21 100644 (file)
@@ -115,7 +115,7 @@ static struct hv_ops lguest_cons = {
  * (0), and the struct hv_ops containing the put_chars() function. */
 static int __init cons_init(void)
 {
-       if (strcmp(paravirt_ops.name, "lguest") != 0)
+       if (strcmp(pv_info.name, "lguest") != 0)
                return 0;
 
        return hvc_instantiate(0, 0, &lguest_cons);
index 4a315f0..a0788c1 100644 (file)
@@ -248,8 +248,8 @@ static void unmap_switcher(void)
 }
 
 /*H:130 Our Guest is usually so well behaved; it never tries to do things it
- * isn't allowed to.  Unfortunately, "struct paravirt_ops" isn't quite
- * complete, because it doesn't contain replacements for the Intel I/O
+ * isn't allowed to.  Unfortunately, Linux's paravirtual infrastructure isn't
+ * quite complete, because it doesn't contain replacements for the Intel I/O
  * instructions.  As a result, the Guest sometimes fumbles across one during
  * the boot process as it probes for various things which are usually attached
  * to a PC.
@@ -694,7 +694,7 @@ static int __init init(void)
 
        /* Lguest can't run under Xen, VMI or itself.  It does Tricky Stuff. */
        if (paravirt_enabled()) {
-               printk("lguest is afraid of %s\n", paravirt_ops.name);
+               printk("lguest is afraid of %s\n", pv_info.name);
                return -EPERM;
        }
 
index 4a579c8..3ba337d 100644 (file)
@@ -23,7 +23,7 @@
  *
  * So how does the kernel know it's a Guest?  The Guest starts at a special
  * entry point marked with a magic string, which sets up a few things then
- * calls here.  We replace the native functions in "struct paravirt_ops"
+ * calls here.  We replace the native functions various "paravirt" structures
  * with our Guest versions, then boot like normal. :*/
 
 /*
@@ -97,29 +97,17 @@ static cycle_t clock_base;
  * them as a batch when lazy_mode is eventually turned off.  Because hypercalls
  * are reasonably expensive, batching them up makes sense.  For example, a
  * large mmap might update dozens of page table entries: that code calls
- * lguest_lazy_mode(PARAVIRT_LAZY_MMU), does the dozen updates, then calls
- * lguest_lazy_mode(PARAVIRT_LAZY_NONE).
+ * paravirt_enter_lazy_mmu(), does the dozen updates, then calls
+ * lguest_leave_lazy_mode().
  *
  * So, when we're in lazy mode, we call async_hypercall() to store the call for
  * future processing.  When lazy mode is turned off we issue a hypercall to
  * flush the stored calls.
- *
- * There's also a hack where "mode" is set to "PARAVIRT_LAZY_FLUSH" which
- * indicates we're to flush any outstanding calls immediately.  This is used
- * when an interrupt handler does a kmap_atomic(): the page table changes must
- * happen immediately even if we're in the middle of a batch.  Usually we're
- * not, though, so there's nothing to do. */
-static enum paravirt_lazy_mode lazy_mode; /* Note: not SMP-safe! */
-static void lguest_lazy_mode(enum paravirt_lazy_mode mode)
+ */
+static void lguest_leave_lazy_mode(void)
 {
-       if (mode == PARAVIRT_LAZY_FLUSH) {
-               if (unlikely(lazy_mode != PARAVIRT_LAZY_NONE))
-                       hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
-       } else {
-               lazy_mode = mode;
-               if (mode == PARAVIRT_LAZY_NONE)
-                       hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
-       }
+       paravirt_leave_lazy(paravirt_get_lazy_mode());
+       hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0);
 }
 
 static void lazy_hcall(unsigned long call,
@@ -127,7 +115,7 @@ static void lazy_hcall(unsigned long call,
                       unsigned long arg2,
                       unsigned long arg3)
 {
-       if (lazy_mode == PARAVIRT_LAZY_NONE)
+       if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE)
                hcall(call, arg1, arg2, arg3);
        else
                async_hcall(call, arg1, arg2, arg3);
@@ -331,7 +319,7 @@ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
 }
 
 /*G:038 That's enough excitement for now, back to ploughing through each of
- * the paravirt_ops (we're about 1/3 of the way through).
+ * the different pv_ops structures (we're about 1/3 of the way through).
  *
  * This is the Local Descriptor Table, another weird Intel thingy.  Linux only
  * uses this for some strange applications like Wine.  We don't do anything
@@ -558,7 +546,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval)
                lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0);
 }
 
-/* Unfortunately for Lguest, the paravirt_ops for page tables were based on
+/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
  * native page table operations.  On native hardware you can set a new page
  * table entry whenever you want, but if you want to remove one you have to do
  * a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
@@ -782,7 +770,7 @@ static void lguest_time_init(void)
        clocksource_register(&lguest_clock);
 
        /* Now we've set up our clock, we can use it as the scheduler clock */
-       paravirt_ops.sched_clock = lguest_sched_clock;
+       pv_time_ops.sched_clock = lguest_sched_clock;
 
        /* We can't set cpumask in the initializer: damn C limitations!  Set it
         * here and register our timer device. */
@@ -904,7 +892,7 @@ static __init char *lguest_memory_setup(void)
 /*G:050
  * Patching (Powerfully Placating Performance Pedants)
  *
- * We have already seen that "struct paravirt_ops" lets us replace simple
+ * We have already seen that pv_ops structures let us replace simple
  * native instructions with calls to the appropriate back end all throughout
  * the kernel.  This allows the same kernel to run as a Guest and as a native
  * kernel, but it's slow because of all the indirect branches.
@@ -929,10 +917,10 @@ static const struct lguest_insns
 {
        const char *start, *end;
 } lguest_insns[] = {
-       [PARAVIRT_PATCH(irq_disable)] = { lgstart_cli, lgend_cli },
-       [PARAVIRT_PATCH(irq_enable)] = { lgstart_sti, lgend_sti },
-       [PARAVIRT_PATCH(restore_fl)] = { lgstart_popf, lgend_popf },
-       [PARAVIRT_PATCH(save_fl)] = { lgstart_pushf, lgend_pushf },
+       [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli },
+       [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti },
+       [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf },
+       [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
 };
 
 /* Now our patch routine is fairly simple (based on the native one in
@@ -959,9 +947,9 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
        return insn_len;
 }
 
-/*G:030 Once we get to lguest_init(), we know we're a Guest.  The paravirt_ops
- * structure in the kernel provides a single point for (almost) every routine
- * we have to override to avoid privileged instructions. */
+/*G:030 Once we get to lguest_init(), we know we're a Guest.  The pv_ops
+ * structures in the kernel provide points for (almost) every routine we have
+ * to override to avoid privileged instructions. */
 __init void lguest_init(void *boot)
 {
        /* Copy boot parameters first: the Launcher put the physical location
@@ -976,54 +964,70 @@ __init void lguest_init(void *boot)
 
        /* We're under lguest, paravirt is enabled, and we're running at
         * privilege level 1, not 0 as normal. */
-       paravirt_ops.name = "lguest";
-       paravirt_ops.paravirt_enabled = 1;
-       paravirt_ops.kernel_rpl = 1;
+       pv_info.name = "lguest";
+       pv_info.paravirt_enabled = 1;
+       pv_info.kernel_rpl = 1;
 
        /* We set up all the lguest overrides for sensitive operations.  These
         * are detailed with the operations themselves. */
-       paravirt_ops.save_fl = save_fl;
-       paravirt_ops.restore_fl = restore_fl;
-       paravirt_ops.irq_disable = irq_disable;
-       paravirt_ops.irq_enable = irq_enable;
-       paravirt_ops.load_gdt = lguest_load_gdt;
-       paravirt_ops.memory_setup = lguest_memory_setup;
-       paravirt_ops.cpuid = lguest_cpuid;
-       paravirt_ops.write_cr3 = lguest_write_cr3;
-       paravirt_ops.flush_tlb_user = lguest_flush_tlb_user;
-       paravirt_ops.flush_tlb_single = lguest_flush_tlb_single;
-       paravirt_ops.flush_tlb_kernel = lguest_flush_tlb_kernel;
-       paravirt_ops.set_pte = lguest_set_pte;
-       paravirt_ops.set_pte_at = lguest_set_pte_at;
-       paravirt_ops.set_pmd = lguest_set_pmd;
+
+       /* interrupt-related operations */
+       pv_irq_ops.init_IRQ = lguest_init_IRQ;
+       pv_irq_ops.save_fl = save_fl;
+       pv_irq_ops.restore_fl = restore_fl;
+       pv_irq_ops.irq_disable = irq_disable;
+       pv_irq_ops.irq_enable = irq_enable;
+       pv_irq_ops.safe_halt = lguest_safe_halt;
+
+       /* init-time operations */
+       pv_init_ops.memory_setup = lguest_memory_setup;
+       pv_init_ops.patch = lguest_patch;
+
+       /* Intercepts of various cpu instructions */
+       pv_cpu_ops.load_gdt = lguest_load_gdt;
+       pv_cpu_ops.cpuid = lguest_cpuid;
+       pv_cpu_ops.load_idt = lguest_load_idt;
+       pv_cpu_ops.iret = lguest_iret;
+       pv_cpu_ops.load_esp0 = lguest_load_esp0;
+       pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
+       pv_cpu_ops.set_ldt = lguest_set_ldt;
+       pv_cpu_ops.load_tls = lguest_load_tls;
+       pv_cpu_ops.set_debugreg = lguest_set_debugreg;
+       pv_cpu_ops.clts = lguest_clts;
+       pv_cpu_ops.read_cr0 = lguest_read_cr0;
+       pv_cpu_ops.write_cr0 = lguest_write_cr0;
+       pv_cpu_ops.read_cr4 = lguest_read_cr4;
+       pv_cpu_ops.write_cr4 = lguest_write_cr4;
+       pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
+       pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
+       pv_cpu_ops.wbinvd = lguest_wbinvd;
+       pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
+       pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
+
+       /* pagetable management */
+       pv_mmu_ops.write_cr3 = lguest_write_cr3;
+       pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user;
+       pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single;
+       pv_mmu_ops.flush_tlb_kernel = lguest_flush_tlb_kernel;
+       pv_mmu_ops.set_pte = lguest_set_pte;
+       pv_mmu_ops.set_pte_at = lguest_set_pte_at;
+       pv_mmu_ops.set_pmd = lguest_set_pmd;
+       pv_mmu_ops.read_cr2 = lguest_read_cr2;
+       pv_mmu_ops.read_cr3 = lguest_read_cr3;
+       pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
+       pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
+
 #ifdef CONFIG_X86_LOCAL_APIC
-       paravirt_ops.apic_write = lguest_apic_write;
-       paravirt_ops.apic_write_atomic = lguest_apic_write;
-       paravirt_ops.apic_read = lguest_apic_read;
+       /* apic read/write intercepts */
+       pv_apic_ops.apic_write = lguest_apic_write;
+       pv_apic_ops.apic_write_atomic = lguest_apic_write;
+       pv_apic_ops.apic_read = lguest_apic_read;
 #endif
-       paravirt_ops.load_idt = lguest_load_idt;
-       paravirt_ops.iret = lguest_iret;
-       paravirt_ops.load_esp0 = lguest_load_esp0;
-       paravirt_ops.load_tr_desc = lguest_load_tr_desc;
-       paravirt_ops.set_ldt = lguest_set_ldt;
-       paravirt_ops.load_tls = lguest_load_tls;
-       paravirt_ops.set_debugreg = lguest_set_debugreg;
-       paravirt_ops.clts = lguest_clts;
-       paravirt_ops.read_cr0 = lguest_read_cr0;
-       paravirt_ops.write_cr0 = lguest_write_cr0;
-       paravirt_ops.init_IRQ = lguest_init_IRQ;
-       paravirt_ops.read_cr2 = lguest_read_cr2;
-       paravirt_ops.read_cr3 = lguest_read_cr3;
-       paravirt_ops.read_cr4 = lguest_read_cr4;
-       paravirt_ops.write_cr4 = lguest_write_cr4;
-       paravirt_ops.write_gdt_entry = lguest_write_gdt_entry;
-       paravirt_ops.write_idt_entry = lguest_write_idt_entry;
-       paravirt_ops.patch = lguest_patch;
-       paravirt_ops.safe_halt = lguest_safe_halt;
-       paravirt_ops.get_wallclock = lguest_get_wallclock;
-       paravirt_ops.time_init = lguest_time_init;
-       paravirt_ops.set_lazy_mode = lguest_lazy_mode;
-       paravirt_ops.wbinvd = lguest_wbinvd;
+
+       /* time operations */
+       pv_time_ops.get_wallclock = lguest_get_wallclock;
+       pv_time_ops.time_init = lguest_time_init;
+
        /* Now is a good time to look at the implementations of these functions
         * before returning to the rest of lguest_init(). */
 
index 9e7752c..5732978 100644 (file)
@@ -201,7 +201,7 @@ static void scan_devices(void)
  * "struct lguest_device_desc" array. */
 static int __init lguest_bus_init(void)
 {
-       if (strcmp(paravirt_ops.name, "lguest") != 0)
+       if (strcmp(pv_info.name, "lguest") != 0)
                return 0;
 
        /* Devices are in a single page above top of "normal" mem */
index 0c86be7..aac8a02 100644 (file)
@@ -764,6 +764,8 @@ struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
                if (unlikely(!sgl))
                        goto enomem;
 
+               memset(sgl, 0, sizeof(*sgl) * sgp->size);
+
                /*
                 * first loop through, set initial index and return value
                 */
index b37baf8..3bdce91 100644 (file)
@@ -40,7 +40,9 @@
 
 #include <linux/delay.h>
 #include <linux/types.h>
+
 #include <asm/io.h>
+#include <asm/irq.h>
 
 /* cpu pipeline flush */
 void static inline au_sync(void)
@@ -523,63 +525,67 @@ extern struct au1xxx_irqmap au1xxx_irq_map[];
 /* Interrupt Numbers */
 /* Au1000 */
 #ifdef CONFIG_SOC_AU1000
-#define AU1000_UART0_INT          0
-#define AU1000_UART1_INT          1 /* au1000 */
-#define AU1000_UART2_INT          2 /* au1000 */
-#define AU1000_UART3_INT          3
-#define AU1000_SSI0_INT           4 /* au1000 */
-#define AU1000_SSI1_INT           5 /* au1000 */
-#define AU1000_DMA_INT_BASE       6
-#define AU1000_TOY_INT            14
-#define AU1000_TOY_MATCH0_INT     15
-#define AU1000_TOY_MATCH1_INT     16
-#define AU1000_TOY_MATCH2_INT     17
-#define AU1000_RTC_INT            18
-#define AU1000_RTC_MATCH0_INT     19
-#define AU1000_RTC_MATCH1_INT     20
-#define AU1000_RTC_MATCH2_INT     21
-#define AU1000_IRDA_TX_INT        22 /* au1000 */
-#define AU1000_IRDA_RX_INT        23 /* au1000 */
-#define AU1000_USB_DEV_REQ_INT    24
-#define AU1000_USB_DEV_SUS_INT    25
-#define AU1000_USB_HOST_INT       26
-#define AU1000_ACSYNC_INT         27
-#define AU1000_MAC0_DMA_INT       28
-#define AU1000_MAC1_DMA_INT       29
-#define AU1000_I2S_UO_INT         30 /* au1000 */
-#define AU1000_AC97C_INT          31
-#define AU1000_GPIO_0             32
-#define AU1000_GPIO_1             33
-#define AU1000_GPIO_2             34
-#define AU1000_GPIO_3             35
-#define AU1000_GPIO_4             36
-#define AU1000_GPIO_5             37
-#define AU1000_GPIO_6             38
-#define AU1000_GPIO_7             39
-#define AU1000_GPIO_8             40
-#define AU1000_GPIO_9             41
-#define AU1000_GPIO_10            42
-#define AU1000_GPIO_11            43
-#define AU1000_GPIO_12            44
-#define AU1000_GPIO_13            45
-#define AU1000_GPIO_14            46
-#define AU1000_GPIO_15            47
-#define AU1000_GPIO_16            48
-#define AU1000_GPIO_17            49
-#define AU1000_GPIO_18            50
-#define AU1000_GPIO_19            51
-#define AU1000_GPIO_20            52
-#define AU1000_GPIO_21            53
-#define AU1000_GPIO_22            54
-#define AU1000_GPIO_23            55
-#define AU1000_GPIO_24            56
-#define AU1000_GPIO_25            57
-#define AU1000_GPIO_26            58
-#define AU1000_GPIO_27            59
-#define AU1000_GPIO_28            60
-#define AU1000_GPIO_29            61
-#define AU1000_GPIO_30            62
-#define AU1000_GPIO_31            63
+enum soc_au1000_ints {
+       AU1000_FIRST_INT        = MIPS_CPU_IRQ_BASE,
+       AU1000_UART0_INT        = AU1000_FIRST_INT,
+       AU1000_UART1_INT,                               /* au1000 */
+       AU1000_UART2_INT,                               /* au1000 */
+       AU1000_UART3_INT,
+       AU1000_SSI0_INT,                                /* au1000 */
+       AU1000_SSI1_INT,                                /* au1000 */
+       AU1000_DMA_INT_BASE,
+
+       AU1000_TOY_INT          = AU1000_FIRST_INT + 14,
+       AU1000_TOY_MATCH0_INT,
+       AU1000_TOY_MATCH1_INT,
+       AU1000_TOY_MATCH2_INT,
+       AU1000_RTC_INT,
+       AU1000_RTC_MATCH0_INT,
+       AU1000_RTC_MATCH1_INT,
+       AU1000_RTC_MATCH2_INT,
+       AU1000_IRDA_TX_INT,                             /* au1000 */
+       AU1000_IRDA_RX_INT,                             /* au1000 */
+       AU1000_USB_DEV_REQ_INT,
+       AU1000_USB_DEV_SUS_INT,
+       AU1000_USB_HOST_INT,
+       AU1000_ACSYNC_INT,
+       AU1000_MAC0_DMA_INT,
+       AU1000_MAC1_DMA_INT,
+       AU1000_I2S_UO_INT,                              /* au1000 */
+       AU1000_AC97C_INT,
+       AU1000_GPIO_0,
+       AU1000_GPIO_1,
+       AU1000_GPIO_2,
+       AU1000_GPIO_3,
+       AU1000_GPIO_4,
+       AU1000_GPIO_5,
+       AU1000_GPIO_6,
+       AU1000_GPIO_7,
+       AU1000_GPIO_8,
+       AU1000_GPIO_9,
+       AU1000_GPIO_10,
+       AU1000_GPIO_11,
+       AU1000_GPIO_12,
+       AU1000_GPIO_13,
+       AU1000_GPIO_14,
+       AU1000_GPIO_15,
+       AU1000_GPIO_16,
+       AU1000_GPIO_17,
+       AU1000_GPIO_18,
+       AU1000_GPIO_19,
+       AU1000_GPIO_20,
+       AU1000_GPIO_21,
+       AU1000_GPIO_22,
+       AU1000_GPIO_23,
+       AU1000_GPIO_24,
+       AU1000_GPIO_25,
+       AU1000_GPIO_26,
+       AU1000_GPIO_27,
+       AU1000_GPIO_28,
+       AU1000_GPIO_29,
+       AU1000_GPIO_30,
+       AU1000_GPIO_31,
+};
 
 #define UART0_ADDR                0xB1100000
 #define UART1_ADDR                0xB1200000
@@ -598,61 +604,65 @@ extern struct au1xxx_irqmap au1xxx_irq_map[];
 
 /* Au1500 */
 #ifdef CONFIG_SOC_AU1500
-#define AU1500_UART0_INT          0
-#define AU1000_PCI_INTA           1 /* au1500 */
-#define AU1000_PCI_INTB           2 /* au1500 */
-#define AU1500_UART3_INT          3
-#define AU1000_PCI_INTC           4 /* au1500 */
-#define AU1000_PCI_INTD           5 /* au1500 */
-#define AU1000_DMA_INT_BASE       6
-#define AU1000_TOY_INT            14
-#define AU1000_TOY_MATCH0_INT     15
-#define AU1000_TOY_MATCH1_INT     16
-#define AU1000_TOY_MATCH2_INT     17
-#define AU1000_RTC_INT            18
-#define AU1000_RTC_MATCH0_INT     19
-#define AU1000_RTC_MATCH1_INT     20
-#define AU1000_RTC_MATCH2_INT     21
-#define AU1500_PCI_ERR_INT        22
-#define AU1000_USB_DEV_REQ_INT    24
-#define AU1000_USB_DEV_SUS_INT    25
-#define AU1000_USB_HOST_INT       26
-#define AU1000_ACSYNC_INT         27
-#define AU1500_MAC0_DMA_INT       28
-#define AU1500_MAC1_DMA_INT       29
-#define AU1000_AC97C_INT          31
-#define AU1000_GPIO_0             32
-#define AU1000_GPIO_1             33
-#define AU1000_GPIO_2             34
-#define AU1000_GPIO_3             35
-#define AU1000_GPIO_4             36
-#define AU1000_GPIO_5             37
-#define AU1000_GPIO_6             38
-#define AU1000_GPIO_7             39
-#define AU1000_GPIO_8             40
-#define AU1000_GPIO_9             41
-#define AU1000_GPIO_10            42
-#define AU1000_GPIO_11            43
-#define AU1000_GPIO_12            44
-#define AU1000_GPIO_13            45
-#define AU1000_GPIO_14            46
-#define AU1000_GPIO_15            47
-#define AU1500_GPIO_200           48
-#define AU1500_GPIO_201           49
-#define AU1500_GPIO_202           50
-#define AU1500_GPIO_203           51
-#define AU1500_GPIO_20            52
-#define AU1500_GPIO_204           53
-#define AU1500_GPIO_205           54
-#define AU1500_GPIO_23            55
-#define AU1500_GPIO_24            56
-#define AU1500_GPIO_25            57
-#define AU1500_GPIO_26            58
-#define AU1500_GPIO_27            59
-#define AU1500_GPIO_28            60
-#define AU1500_GPIO_206           61
-#define AU1500_GPIO_207           62
-#define AU1500_GPIO_208_215       63
+enum soc_au1500_ints {
+       AU1500_FIRST_INT        = MIPS_CPU_IRQ_BASE,
+       AU1500_UART0_INT        = AU1500_FIRST_INT,
+       AU1000_PCI_INTA,                                /* au1500 */
+       AU1000_PCI_INTB,                                /* au1500 */
+       AU1500_UART3_INT,
+       AU1000_PCI_INTC,                                /* au1500 */
+       AU1000_PCI_INTD,                                /* au1500 */
+       AU1000_DMA_INT_BASE,
+
+       AU1000_TOY_INT          = AU1500_FIRST_INT + 14,
+       AU1000_TOY_MATCH0_INT,
+       AU1000_TOY_MATCH1_INT,
+       AU1000_TOY_MATCH2_INT,
+       AU1000_RTC_INT,
+       AU1000_RTC_MATCH0_INT,
+       AU1000_RTC_MATCH1_INT,
+       AU1000_RTC_MATCH2_INT,
+       AU1500_PCI_ERR_INT,
+       AU1000_USB_DEV_REQ_INT,
+       AU1000_USB_DEV_SUS_INT,
+       AU1000_USB_HOST_INT,
+       AU1000_ACSYNC_INT,
+       AU1500_MAC0_DMA_INT,
+       AU1500_MAC1_DMA_INT,
+       AU1000_AC97C_INT        = AU1500_FIRST_INT + 31,
+       AU1000_GPIO_0,
+       AU1000_GPIO_1,
+       AU1000_GPIO_2,
+       AU1000_GPIO_3,
+       AU1000_GPIO_4,
+       AU1000_GPIO_5,
+       AU1000_GPIO_6,
+       AU1000_GPIO_7,
+       AU1000_GPIO_8,
+       AU1000_GPIO_9,
+       AU1000_GPIO_10,
+       AU1000_GPIO_11,
+       AU1000_GPIO_12,
+       AU1000_GPIO_13,
+       AU1000_GPIO_14,
+       AU1000_GPIO_15,
+       AU1500_GPIO_200,
+       AU1500_GPIO_201,
+       AU1500_GPIO_202,
+       AU1500_GPIO_203,
+       AU1500_GPIO_20,
+       AU1500_GPIO_204,
+       AU1500_GPIO_205,
+       AU1500_GPIO_23,
+       AU1500_GPIO_24,
+       AU1500_GPIO_25,
+       AU1500_GPIO_26,
+       AU1500_GPIO_27,
+       AU1500_GPIO_28,
+       AU1500_GPIO_206,
+       AU1500_GPIO_207,
+       AU1500_GPIO_208_215,
+};
 
 /* shortcuts */
 #define INTA AU1000_PCI_INTA
@@ -675,63 +685,67 @@ extern struct au1xxx_irqmap au1xxx_irq_map[];
 
 /* Au1100 */
 #ifdef CONFIG_SOC_AU1100
-#define AU1100_UART0_INT          0
-#define AU1100_UART1_INT          1
-#define AU1100_SD_INT             2
-#define AU1100_UART3_INT          3
-#define AU1000_SSI0_INT           4
-#define AU1000_SSI1_INT           5
-#define AU1000_DMA_INT_BASE       6
-#define AU1000_TOY_INT            14
-#define AU1000_TOY_MATCH0_INT     15
-#define AU1000_TOY_MATCH1_INT     16
-#define AU1000_TOY_MATCH2_INT     17
-#define AU1000_RTC_INT            18
-#define AU1000_RTC_MATCH0_INT     19
-#define AU1000_RTC_MATCH1_INT     20
-#define AU1000_RTC_MATCH2_INT     21
-#define AU1000_IRDA_TX_INT        22
-#define AU1000_IRDA_RX_INT        23
-#define AU1000_USB_DEV_REQ_INT    24
-#define AU1000_USB_DEV_SUS_INT    25
-#define AU1000_USB_HOST_INT       26
-#define AU1000_ACSYNC_INT         27
-#define AU1100_MAC0_DMA_INT       28
-#define        AU1100_GPIO_208_215     29
-#define        AU1100_LCD_INT            30
-#define AU1000_AC97C_INT          31
-#define AU1000_GPIO_0             32
-#define AU1000_GPIO_1             33
-#define AU1000_GPIO_2             34
-#define AU1000_GPIO_3             35
-#define AU1000_GPIO_4             36
-#define AU1000_GPIO_5             37
-#define AU1000_GPIO_6             38
-#define AU1000_GPIO_7             39
-#define AU1000_GPIO_8             40
-#define AU1000_GPIO_9             41
-#define AU1000_GPIO_10            42
-#define AU1000_GPIO_11            43
-#define AU1000_GPIO_12            44
-#define AU1000_GPIO_13            45
-#define AU1000_GPIO_14            46
-#define AU1000_GPIO_15            47
-#define AU1000_GPIO_16            48
-#define AU1000_GPIO_17            49
-#define AU1000_GPIO_18            50
-#define AU1000_GPIO_19            51
-#define AU1000_GPIO_20            52
-#define AU1000_GPIO_21            53
-#define AU1000_GPIO_22            54
-#define AU1000_GPIO_23            55
-#define AU1000_GPIO_24            56
-#define AU1000_GPIO_25            57
-#define AU1000_GPIO_26            58
-#define AU1000_GPIO_27            59
-#define AU1000_GPIO_28            60
-#define AU1000_GPIO_29            61
-#define AU1000_GPIO_30            62
-#define AU1000_GPIO_31            63
+enum soc_au1100_ints {
+       AU1100_FIRST_INT        = MIPS_CPU_IRQ_BASE,
+       AU1100_UART0_INT,
+       AU1100_UART1_INT,
+       AU1100_SD_INT,
+       AU1100_UART3_INT,
+       AU1000_SSI0_INT,
+       AU1000_SSI1_INT,
+       AU1000_DMA_INT_BASE,
+
+       AU1000_TOY_INT          = AU1100_FIRST_INT + 14,
+       AU1000_TOY_MATCH0_INT,
+       AU1000_TOY_MATCH1_INT,
+       AU1000_TOY_MATCH2_INT,
+       AU1000_RTC_INT,
+       AU1000_RTC_MATCH0_INT,
+       AU1000_RTC_MATCH1_INT,
+       AU1000_RTC_MATCH2_INT,
+       AU1000_IRDA_TX_INT,
+       AU1000_IRDA_RX_INT,
+       AU1000_USB_DEV_REQ_INT,
+       AU1000_USB_DEV_SUS_INT,
+       AU1000_USB_HOST_INT,
+       AU1000_ACSYNC_INT,
+       AU1100_MAC0_DMA_INT,
+       AU1100_GPIO_208_215,
+       AU1100_LCD_INT,
+       AU1000_AC97C_INT,
+       AU1000_GPIO_0,
+       AU1000_GPIO_1,
+       AU1000_GPIO_2,
+       AU1000_GPIO_3,
+       AU1000_GPIO_4,
+       AU1000_GPIO_5,
+       AU1000_GPIO_6,
+       AU1000_GPIO_7,
+       AU1000_GPIO_8,
+       AU1000_GPIO_9,
+       AU1000_GPIO_10,
+       AU1000_GPIO_11,
+       AU1000_GPIO_12,
+       AU1000_GPIO_13,
+       AU1000_GPIO_14,
+       AU1000_GPIO_15,
+       AU1000_GPIO_16,
+       AU1000_GPIO_17,
+       AU1000_GPIO_18,
+       AU1000_GPIO_19,
+       AU1000_GPIO_20,
+       AU1000_GPIO_21,
+       AU1000_GPIO_22,
+       AU1000_GPIO_23,
+       AU1000_GPIO_24,
+       AU1000_GPIO_25,
+       AU1000_GPIO_26,
+       AU1000_GPIO_27,
+       AU1000_GPIO_28,
+       AU1000_GPIO_29,
+       AU1000_GPIO_30,
+       AU1000_GPIO_31,
+};
 
 #define UART0_ADDR                0xB1100000
 #define UART1_ADDR                0xB1200000
@@ -746,69 +760,73 @@ extern struct au1xxx_irqmap au1xxx_irq_map[];
 #endif /* CONFIG_SOC_AU1100 */
 
 #ifdef CONFIG_SOC_AU1550
-#define AU1550_UART0_INT          0
-#define AU1550_PCI_INTA           1
-#define AU1550_PCI_INTB           2
-#define AU1550_DDMA_INT           3
-#define AU1550_CRYPTO_INT         4
-#define AU1550_PCI_INTC           5
-#define AU1550_PCI_INTD           6
-#define AU1550_PCI_RST_INT        7
-#define AU1550_UART1_INT          8
-#define AU1550_UART3_INT          9
-#define AU1550_PSC0_INT           10
-#define AU1550_PSC1_INT           11
-#define AU1550_PSC2_INT           12
-#define AU1550_PSC3_INT           13
-#define AU1000_TOY_INT                   14
-#define AU1000_TOY_MATCH0_INT     15
-#define AU1000_TOY_MATCH1_INT     16
-#define AU1000_TOY_MATCH2_INT     17
-#define AU1000_RTC_INT            18
-#define AU1000_RTC_MATCH0_INT     19
-#define AU1000_RTC_MATCH1_INT     20
-#define AU1000_RTC_MATCH2_INT     21
-#define AU1550_NAND_INT           23
-#define AU1550_USB_DEV_REQ_INT    24
-#define AU1550_USB_DEV_SUS_INT    25
-#define AU1550_USB_HOST_INT       26
-#define AU1000_USB_DEV_REQ_INT    AU1550_USB_DEV_REQ_INT
-#define AU1000_USB_DEV_SUS_INT    AU1550_USB_DEV_SUS_INT
-#define AU1000_USB_HOST_INT       AU1550_USB_HOST_INT
-#define AU1550_MAC0_DMA_INT       27
-#define AU1550_MAC1_DMA_INT       28
-#define AU1000_GPIO_0             32
-#define AU1000_GPIO_1             33
-#define AU1000_GPIO_2             34
-#define AU1000_GPIO_3             35
-#define AU1000_GPIO_4             36
-#define AU1000_GPIO_5             37
-#define AU1000_GPIO_6             38
-#define AU1000_GPIO_7             39
-#define AU1000_GPIO_8             40
-#define AU1000_GPIO_9             41
-#define AU1000_GPIO_10            42
-#define AU1000_GPIO_11            43
-#define AU1000_GPIO_12            44
-#define AU1000_GPIO_13            45
-#define AU1000_GPIO_14            46
-#define AU1000_GPIO_15            47
-#define AU1550_GPIO_200           48
-#define AU1500_GPIO_201_205       49   // Logical or of GPIO201:205
-#define AU1500_GPIO_16            50
-#define AU1500_GPIO_17            51
-#define AU1500_GPIO_20            52
-#define AU1500_GPIO_21            53
-#define AU1500_GPIO_22            54
-#define AU1500_GPIO_23            55
-#define AU1500_GPIO_24            56
-#define AU1500_GPIO_25            57
-#define AU1500_GPIO_26            58
-#define AU1500_GPIO_27            59
-#define AU1500_GPIO_28            60
-#define AU1500_GPIO_206           61
-#define AU1500_GPIO_207           62
-#define AU1500_GPIO_208_218       63   // Logical or of GPIO208:218
+enum soc_au1550_ints {
+       AU1550_FIRST_INT        = MIPS_CPU_IRQ_BASE,
+       AU1550_UART0_INT        = AU1550_FIRST_INT,
+       AU1550_PCI_INTA,
+       AU1550_PCI_INTB,
+       AU1550_DDMA_INT,
+       AU1550_CRYPTO_INT,
+       AU1550_PCI_INTC,
+       AU1550_PCI_INTD,
+       AU1550_PCI_RST_INT,
+       AU1550_UART1_INT,
+       AU1550_UART3_INT,
+       AU1550_PSC0_INT,
+       AU1550_PSC1_INT,
+       AU1550_PSC2_INT,
+       AU1550_PSC3_INT,
+       AU1000_TOY_INT,
+       AU1000_TOY_MATCH0_INT,
+       AU1000_TOY_MATCH1_INT,
+       AU1000_TOY_MATCH2_INT,
+       AU1000_RTC_INT,
+       AU1000_RTC_MATCH0_INT,
+       AU1000_RTC_MATCH1_INT,
+       AU1000_RTC_MATCH2_INT,
+
+       AU1550_NAND_INT                 = AU1550_FIRST_INT + 23,
+       AU1550_USB_DEV_REQ_INT,
+       AU1000_USB_DEV_REQ_INT          = AU1550_USB_DEV_REQ_INT,
+       AU1550_USB_DEV_SUS_INT,
+       AU1000_USB_DEV_SUS_INT          = AU1550_USB_DEV_SUS_INT,
+       AU1550_USB_HOST_INT,
+       AU1000_USB_HOST_INT             = AU1550_USB_HOST_INT,
+       AU1550_MAC0_DMA_INT,
+       AU1550_MAC1_DMA_INT,
+       AU1000_GPIO_0                   = AU1550_FIRST_INT + 32,
+       AU1000_GPIO_1,
+       AU1000_GPIO_2,
+       AU1000_GPIO_3,
+       AU1000_GPIO_4,
+       AU1000_GPIO_5,
+       AU1000_GPIO_6,
+       AU1000_GPIO_7,
+       AU1000_GPIO_8,
+       AU1000_GPIO_9,
+       AU1000_GPIO_10,
+       AU1000_GPIO_11,
+       AU1000_GPIO_12,
+       AU1000_GPIO_13,
+       AU1000_GPIO_14,
+       AU1000_GPIO_15,
+       AU1550_GPIO_200,
+       AU1500_GPIO_201_205,                    /* Logical or of GPIO201:205 */
+       AU1500_GPIO_16,
+       AU1500_GPIO_17,
+       AU1500_GPIO_20,
+       AU1500_GPIO_21,
+       AU1500_GPIO_22,
+       AU1500_GPIO_23,
+       AU1500_GPIO_24,
+       AU1500_GPIO_25,
+       AU1500_GPIO_26,
+       AU1500_GPIO_27,
+       AU1500_GPIO_28,
+       AU1500_GPIO_206,
+       AU1500_GPIO_207,
+       AU1500_GPIO_208_218,                    /* Logical or of GPIO208:218 */
+};
 
 /* shortcuts */
 #define INTA AU1550_PCI_INTA
@@ -832,70 +850,74 @@ extern struct au1xxx_irqmap au1xxx_irq_map[];
 #endif /* CONFIG_SOC_AU1550 */
 
 #ifdef CONFIG_SOC_AU1200
-#define AU1200_UART0_INT          0
-#define AU1200_SWT_INT            1
-#define AU1200_SD_INT             2
-#define AU1200_DDMA_INT           3
-#define AU1200_MAE_BE_INT         4
-#define AU1200_GPIO_200           5
-#define AU1200_GPIO_201           6
-#define AU1200_GPIO_202           7
-#define AU1200_UART1_INT          8
-#define AU1200_MAE_FE_INT         9
-#define AU1200_PSC0_INT           10
-#define AU1200_PSC1_INT           11
-#define AU1200_AES_INT            12
-#define AU1200_CAMERA_INT         13
-#define AU1000_TOY_INT                   14
-#define AU1000_TOY_MATCH0_INT     15
-#define AU1000_TOY_MATCH1_INT     16
-#define AU1000_TOY_MATCH2_INT     17
-#define AU1000_RTC_INT            18
-#define AU1000_RTC_MATCH0_INT     19
-#define AU1000_RTC_MATCH1_INT     20
-#define AU1000_RTC_MATCH2_INT     21
-#define AU1200_NAND_INT           23
-#define AU1200_GPIO_204           24
-#define AU1200_GPIO_205           25
-#define AU1200_GPIO_206           26
-#define AU1200_GPIO_207           27
-#define AU1200_GPIO_208_215       28 // Logical OR of 208:215
-#define AU1200_USB_INT            29
-#define AU1000_USB_HOST_INT              AU1200_USB_INT
-#define AU1200_LCD_INT            30
-#define AU1200_MAE_BOTH_INT       31
-#define AU1000_GPIO_0             32
-#define AU1000_GPIO_1             33
-#define AU1000_GPIO_2             34
-#define AU1000_GPIO_3             35
-#define AU1000_GPIO_4             36
-#define AU1000_GPIO_5             37
-#define AU1000_GPIO_6             38
-#define AU1000_GPIO_7             39
-#define AU1000_GPIO_8             40
-#define AU1000_GPIO_9             41
-#define AU1000_GPIO_10            42
-#define AU1000_GPIO_11            43
-#define AU1000_GPIO_12            44
-#define AU1000_GPIO_13            45
-#define AU1000_GPIO_14            46
-#define AU1000_GPIO_15            47
-#define AU1000_GPIO_16            48
-#define AU1000_GPIO_17            49
-#define AU1000_GPIO_18            50
-#define AU1000_GPIO_19            51
-#define AU1000_GPIO_20            52
-#define AU1000_GPIO_21            53
-#define AU1000_GPIO_22            54
-#define AU1000_GPIO_23            55
-#define AU1000_GPIO_24            56
-#define AU1000_GPIO_25            57
-#define AU1000_GPIO_26            58
-#define AU1000_GPIO_27            59
-#define AU1000_GPIO_28            60
-#define AU1000_GPIO_29            61
-#define AU1000_GPIO_30            62
-#define AU1000_GPIO_31            63
+enum soc_au1200_ints {
+       AU1200_FIRST_INT        = MIPS_CPU_IRQ_BASE,
+       AU1200_UART0_INT        = AU1200_FIRST_INT,
+       AU1200_SWT_INT,
+       AU1200_SD_INT,
+       AU1200_DDMA_INT,
+       AU1200_MAE_BE_INT,
+       AU1200_GPIO_200,
+       AU1200_GPIO_201,
+       AU1200_GPIO_202,
+       AU1200_UART1_INT,
+       AU1200_MAE_FE_INT,
+       AU1200_PSC0_INT,
+       AU1200_PSC1_INT,
+       AU1200_AES_INT,
+       AU1200_CAMERA_INT,
+       AU1000_TOY_INT,
+       AU1000_TOY_MATCH0_INT,
+       AU1000_TOY_MATCH1_INT,
+       AU1000_TOY_MATCH2_INT,
+       AU1000_RTC_INT,
+       AU1000_RTC_MATCH0_INT,
+       AU1000_RTC_MATCH1_INT,
+       AU1000_RTC_MATCH2_INT,
+
+       AU1200_NAND_INT         = AU1200_FIRST_INT + 23,
+       AU1200_GPIO_204,
+       AU1200_GPIO_205,
+       AU1200_GPIO_206,
+       AU1200_GPIO_207,
+       AU1200_GPIO_208_215,                    /* Logical OR of 208:215 */
+       AU1200_USB_INT,
+       AU1000_USB_HOST_INT     = AU1200_USB_INT,
+       AU1200_LCD_INT,
+       AU1200_MAE_BOTH_INT,
+       AU1000_GPIO_0,
+       AU1000_GPIO_1,
+       AU1000_GPIO_2,
+       AU1000_GPIO_3,
+       AU1000_GPIO_4,
+       AU1000_GPIO_5,
+       AU1000_GPIO_6,
+       AU1000_GPIO_7,
+       AU1000_GPIO_8,
+       AU1000_GPIO_9,
+       AU1000_GPIO_10,
+       AU1000_GPIO_11,
+       AU1000_GPIO_12,
+       AU1000_GPIO_13,
+       AU1000_GPIO_14,
+       AU1000_GPIO_15,
+       AU1000_GPIO_16,
+       AU1000_GPIO_17,
+       AU1000_GPIO_18,
+       AU1000_GPIO_19,
+       AU1000_GPIO_20,
+       AU1000_GPIO_21,
+       AU1000_GPIO_22,
+       AU1000_GPIO_23,
+       AU1000_GPIO_24,
+       AU1000_GPIO_25,
+       AU1000_GPIO_26,
+       AU1000_GPIO_27,
+       AU1000_GPIO_28,
+       AU1000_GPIO_29,
+       AU1000_GPIO_30,
+       AU1000_GPIO_31,
+};
 
 #define UART0_ADDR                0xB1100000
 #define UART1_ADDR                0xB1200000
@@ -926,10 +948,12 @@ extern struct au1xxx_irqmap au1xxx_irq_map[];
 
 #endif /* CONFIG_SOC_AU1200 */
 
-#define AU1000_LAST_INTC0_INT     31
-#define AU1000_LAST_INTC1_INT     63
-#define AU1000_MAX_INTR           63
-#define INTX                   0xFF /* not valid */
+#define AU1000_INTC0_INT_BASE  (MIPS_CPU_IRQ_BASE + 0)
+#define AU1000_INTC0_INT_LAST  (MIPS_CPU_IRQ_BASE + 31)
+#define AU1000_INTC1_INT_BASE  (MIPS_CPU_IRQ_BASE + 32)
+#define AU1000_INTC1_INT_LAST  (MIPS_CPU_IRQ_BASE + 63)
+#define AU1000_MAX_INTR                (MIPS_CPU_IRQ_BASE + 63)
+#define INTX                   0xFF                    /* not valid */
 
 /* Programmable Counters 0 and 1 */
 #define SYS_BASE                   0xB1900000
index 647fdb5..050eae8 100644 (file)
@@ -181,29 +181,34 @@ static BCSR * const bcsr = (BCSR *)BCSR_KSEG1_ADDR;
 #define NAND_PHYS_ADDR   0x20000000
 
 /*
- *     External Interrupts for Pb1200 as of 8/6/2004.
- *   Bit positions in the CPLD registers can be calculated by taking
- *   the interrupt define and subtracting the DB1200_INT_BEGIN value.
- *    *example: IDE bis pos is  = 64 - 64
-                ETH bit pos is  = 65 - 64
+ * External Interrupts for Pb1200 as of 8/6/2004.
+ * Bit positions in the CPLD registers can be calculated by taking
+ * the interrupt define and subtracting the DB1200_INT_BEGIN value.
+ *
+ *   Example: IDE bis pos is  = 64 - 64
+ *            ETH bit pos is  = 65 - 64
  */
-#define DB1200_INT_BEGIN               (AU1000_LAST_INTC1_INT + 1)
-#define DB1200_IDE_INT                 (DB1200_INT_BEGIN + 0)
-#define DB1200_ETH_INT                 (DB1200_INT_BEGIN + 1)
-#define DB1200_PC0_INT                 (DB1200_INT_BEGIN + 2)
-#define DB1200_PC0_STSCHG_INT  (DB1200_INT_BEGIN + 3)
-#define DB1200_PC1_INT                 (DB1200_INT_BEGIN + 4)
-#define DB1200_PC1_STSCHG_INT  (DB1200_INT_BEGIN + 5)
-#define DB1200_DC_INT                  (DB1200_INT_BEGIN + 6)
-#define DB1200_FLASHBUSY_INT   (DB1200_INT_BEGIN + 7)
-#define DB1200_PC0_INSERT_INT  (DB1200_INT_BEGIN + 8)
-#define DB1200_PC0_EJECT_INT   (DB1200_INT_BEGIN + 9)
-#define DB1200_PC1_INSERT_INT  (DB1200_INT_BEGIN + 10)
-#define DB1200_PC1_EJECT_INT   (DB1200_INT_BEGIN + 11)
-#define DB1200_SD0_INSERT_INT  (DB1200_INT_BEGIN + 12)
-#define DB1200_SD0_EJECT_INT   (DB1200_INT_BEGIN + 13)
-
-#define DB1200_INT_END                 (DB1200_INT_BEGIN + 15)
+enum external_pb1200_ints {
+       DB1200_INT_BEGIN        = AU1000_MAX_INTR + 1,
+
+       DB1200_IDE_INT          = DB1200_INT_BEGIN,
+       DB1200_ETH_INT,
+       DB1200_PC0_INT,
+       DB1200_PC0_STSCHG_INT,
+       DB1200_PC1_INT,
+       DB1200_PC1_STSCHG_INT,
+       DB1200_DC_INT,
+       DB1200_FLASHBUSY_INT,
+       DB1200_PC0_INSERT_INT,
+       DB1200_PC0_EJECT_INT,
+       DB1200_PC1_INSERT_INT,
+       DB1200_PC1_EJECT_INT,
+       DB1200_SD0_INSERT_INT,
+       DB1200_SD0_EJECT_INT,
+
+       DB1200_INT_END          = DB1200_INT_BEGIN + 15,
+};
+
 
 /* For drivers/pcmcia/au1000_db1x00.c */
 
index 409d443..d9f384a 100644 (file)
@@ -217,31 +217,35 @@ static BCSR * const bcsr = (BCSR *)BCSR_KSEG1_ADDR;
 
 
 /*
- *     External Interrupts for Pb1200 as of 8/6/2004.
- *   Bit positions in the CPLD registers can be calculated by taking
- *   the interrupt define and subtracting the PB1200_INT_BEGIN value.
- *    *example: IDE bis pos is  = 64 - 64
-                ETH bit pos is  = 65 - 64
+ * External Interrupts for Pb1200 as of 8/6/2004.
+ * Bit positions in the CPLD registers can be calculated by taking
+ * the interrupt define and subtracting the PB1200_INT_BEGIN value.
+ *
+ *   Example: IDE bis pos is  = 64 - 64
+ *            ETH bit pos is  = 65 - 64
  */
-#define PB1200_INT_BEGIN               (AU1000_LAST_INTC1_INT + 1)
-#define PB1200_IDE_INT                 (PB1200_INT_BEGIN + 0)
-#define PB1200_ETH_INT                 (PB1200_INT_BEGIN + 1)
-#define PB1200_PC0_INT                 (PB1200_INT_BEGIN + 2)
-#define PB1200_PC0_STSCHG_INT  (PB1200_INT_BEGIN + 3)
-#define PB1200_PC1_INT                 (PB1200_INT_BEGIN + 4)
-#define PB1200_PC1_STSCHG_INT  (PB1200_INT_BEGIN + 5)
-#define PB1200_DC_INT                  (PB1200_INT_BEGIN + 6)
-#define PB1200_FLASHBUSY_INT   (PB1200_INT_BEGIN + 7)
-#define PB1200_PC0_INSERT_INT  (PB1200_INT_BEGIN + 8)
-#define PB1200_PC0_EJECT_INT   (PB1200_INT_BEGIN + 9)
-#define PB1200_PC1_INSERT_INT  (PB1200_INT_BEGIN + 10)
-#define PB1200_PC1_EJECT_INT   (PB1200_INT_BEGIN + 11)
-#define PB1200_SD0_INSERT_INT  (PB1200_INT_BEGIN + 12)
-#define PB1200_SD0_EJECT_INT   (PB1200_INT_BEGIN + 13)
-#define PB1200_SD1_INSERT_INT  (PB1200_INT_BEGIN + 14)
-#define PB1200_SD1_EJECT_INT   (PB1200_INT_BEGIN + 15)
-
-#define PB1200_INT_END                 (PB1200_INT_BEGIN + 15)
+enum external_pb1200_ints {
+       PB1200_INT_BEGIN        = AU1000_MAX_INTR + 1,
+
+       PB1200_IDE_INT          = PB1200_INT_BEGIN,
+       PB1200_ETH_INT,
+       PB1200_PC0_INT,
+       PB1200_PC0_STSCHG_INT,
+       PB1200_PC1_INT,
+       PB1200_PC1_STSCHG_INT,
+       PB1200_DC_INT,
+       PB1200_FLASHBUSY_INT,
+       PB1200_PC0_INSERT_INT,
+       PB1200_PC0_EJECT_INT,
+       PB1200_PC1_INSERT_INT,
+       PB1200_PC1_EJECT_INT,
+       PB1200_SD0_INSERT_INT,
+       PB1200_SD0_EJECT_INT,
+       PB1200_SD1_INSERT_INT,
+       PB1200_SD1_EJECT_INT,
+
+       PB1200_INT_END                  (PB1200_INT_BEGIN + 15)
+};
 
 /* For drivers/pcmcia/au1000_db1x00.c */
 #define BOARD_PC0_INT PB1200_PC0_INT
index 9fa3fa9..f59d370 100644 (file)
@@ -25,27 +25,22 @@ struct tss_struct;
 struct mm_struct;
 struct desc_struct;
 
-/* Lazy mode for batching updates / context switch */
-enum paravirt_lazy_mode {
-       PARAVIRT_LAZY_NONE = 0,
-       PARAVIRT_LAZY_MMU = 1,
-       PARAVIRT_LAZY_CPU = 2,
-       PARAVIRT_LAZY_FLUSH = 3,
-};
-
-struct paravirt_ops
-{
+/* general info */
+struct pv_info {
        unsigned int kernel_rpl;
        int shared_kernel_pmd;
-       int paravirt_enabled;
+       int paravirt_enabled;
        const char *name;
+};
 
+struct pv_init_ops {
        /*
-        * Patch may replace one of the defined code sequences with arbitrary
-        * code, subject to the same register constraints.  This generally
-        * means the code is not free to clobber any registers other than EAX.
-        * The patch function should return the number of bytes of code
-        * generated, as we nop pad the rest in generic code.
+        * Patch may replace one of the defined code sequences with
+        * arbitrary code, subject to the same register constraints.
+        * This generally means the code is not free to clobber any
+        * registers other than EAX.  The patch function should return
+        * the number of bytes of code generated, as we nop pad the
+        * rest in generic code.
         */
        unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
                          unsigned long addr, unsigned len);
@@ -55,29 +50,29 @@ struct paravirt_ops
        char *(*memory_setup)(void);
        void (*post_allocator_init)(void);
 
-       void (*init_IRQ)(void);
-       void (*time_init)(void);
-
-       /*
-        * Called before/after init_mm pagetable setup. setup_start
-        * may reset %cr3, and may pre-install parts of the pagetable;
-        * pagetable setup is expected to preserve any existing
-        * mapping.
-        */
-       void (*pagetable_setup_start)(pgd_t *pgd_base);
-       void (*pagetable_setup_done)(pgd_t *pgd_base);
-
        /* Print a banner to identify the environment */
        void (*banner)(void);
+};
+
+
+struct pv_lazy_ops {
+       /* Set deferred update mode, used for batching operations. */
+       void (*enter)(void);
+       void (*leave)(void);
+};
+
+struct pv_time_ops {
+       void (*time_init)(void);
 
        /* Set and set time of day */
        unsigned long (*get_wallclock)(void);
        int (*set_wallclock)(unsigned long);
 
-       /* cpuid emulation, mostly so that caps bits can be disabled */
-       void (*cpuid)(unsigned int *eax, unsigned int *ebx,
-                     unsigned int *ecx, unsigned int *edx);
+       unsigned long long (*sched_clock)(void);
+       unsigned long (*get_cpu_khz)(void);
+};
 
+struct pv_cpu_ops {
        /* hooks for various privileged instructions */
        unsigned long (*get_debugreg)(int regno);
        void (*set_debugreg)(int regno, unsigned long value);
@@ -87,41 +82,10 @@ struct paravirt_ops
        unsigned long (*read_cr0)(void);
        void (*write_cr0)(unsigned long);
 
-       unsigned long (*read_cr2)(void);
-       void (*write_cr2)(unsigned long);
-
-       unsigned long (*read_cr3)(void);
-       void (*write_cr3)(unsigned long);
-
        unsigned long (*read_cr4_safe)(void);
        unsigned long (*read_cr4)(void);
        void (*write_cr4)(unsigned long);
 
-       /*
-        * Get/set interrupt state.  save_fl and restore_fl are only
-        * expected to use X86_EFLAGS_IF; all other bits
-        * returned from save_fl are undefined, and may be ignored by
-        * restore_fl.
-        */
-       unsigned long (*save_fl)(void);
-       void (*restore_fl)(unsigned long);
-       void (*irq_disable)(void);
-       void (*irq_enable)(void);
-       void (*safe_halt)(void);
-       void (*halt)(void);
-
-       void (*wbinvd)(void);
-
-       /* MSR, PMC and TSR operations.
-          err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
-       u64 (*read_msr)(unsigned int msr, int *err);
-       int (*write_msr)(unsigned int msr, u64 val);
-
-       u64 (*read_tsc)(void);
-       u64 (*read_pmc)(void);
-       unsigned long long (*sched_clock)(void);
-       unsigned long (*get_cpu_khz)(void);
-
        /* Segment descriptor handling */
        void (*load_tr_desc)(void);
        void (*load_gdt)(const struct Xgt_desc_struct *);
@@ -140,18 +104,47 @@ struct paravirt_ops
        void (*load_esp0)(struct tss_struct *tss, struct thread_struct *t);
 
        void (*set_iopl_mask)(unsigned mask);
+
+       void (*wbinvd)(void);
        void (*io_delay)(void);
 
+       /* cpuid emulation, mostly so that caps bits can be disabled */
+       void (*cpuid)(unsigned int *eax, unsigned int *ebx,
+                     unsigned int *ecx, unsigned int *edx);
+
+       /* MSR, PMC and TSR operations.
+          err = 0/-EFAULT.  wrmsr returns 0/-EFAULT. */
+       u64 (*read_msr)(unsigned int msr, int *err);
+       int (*write_msr)(unsigned int msr, u64 val);
+
+       u64 (*read_tsc)(void);
+       u64 (*read_pmc)(void);
+
+       /* These two are jmp to, not actually called. */
+       void (*irq_enable_sysexit)(void);
+       void (*iret)(void);
+
+       struct pv_lazy_ops lazy_mode;
+};
+
+struct pv_irq_ops {
+       void (*init_IRQ)(void);
+
        /*
-        * Hooks for intercepting the creation/use/destruction of an
-        * mm_struct.
+        * Get/set interrupt state.  save_fl and restore_fl are only
+        * expected to use X86_EFLAGS_IF; all other bits
+        * returned from save_fl are undefined, and may be ignored by
+        * restore_fl.
         */
-       void (*activate_mm)(struct mm_struct *prev,
-                           struct mm_struct *next);
-       void (*dup_mmap)(struct mm_struct *oldmm,
-                        struct mm_struct *mm);
-       void (*exit_mmap)(struct mm_struct *mm);
+       unsigned long (*save_fl)(void);
+       void (*restore_fl)(unsigned long);
+       void (*irq_disable)(void);
+       void (*irq_enable)(void);
+       void (*safe_halt)(void);
+       void (*halt)(void);
+};
 
+struct pv_apic_ops {
 #ifdef CONFIG_X86_LOCAL_APIC
        /*
         * Direct APIC operations, principally for VMI.  Ideally
@@ -167,6 +160,34 @@ struct paravirt_ops
                                 unsigned long start_eip,
                                 unsigned long start_esp);
 #endif
+};
+
+struct pv_mmu_ops {
+       /*
+        * Called before/after init_mm pagetable setup. setup_start
+        * may reset %cr3, and may pre-install parts of the pagetable;
+        * pagetable setup is expected to preserve any existing
+        * mapping.
+        */
+       void (*pagetable_setup_start)(pgd_t *pgd_base);
+       void (*pagetable_setup_done)(pgd_t *pgd_base);
+
+       unsigned long (*read_cr2)(void);
+       void (*write_cr2)(unsigned long);
+
+       unsigned long (*read_cr3)(void);
+       void (*write_cr3)(unsigned long);
+
+       /*
+        * Hooks for intercepting the creation/use/destruction of an
+        * mm_struct.
+        */
+       void (*activate_mm)(struct mm_struct *prev,
+                           struct mm_struct *next);
+       void (*dup_mmap)(struct mm_struct *oldmm,
+                        struct mm_struct *mm);
+       void (*exit_mmap)(struct mm_struct *mm);
+
 
        /* TLB operations */
        void (*flush_tlb_user)(void);
@@ -191,15 +212,12 @@ struct paravirt_ops
        void (*pte_update_defer)(struct mm_struct *mm,
                                 unsigned long addr, pte_t *ptep);
 
-#ifdef CONFIG_HIGHPTE
-       void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
-#endif
-
 #ifdef CONFIG_X86_PAE
        void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
-       void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte);
+       void (*set_pte_present)(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, pte_t pte);
        void (*set_pud)(pud_t *pudp, pud_t pudval);
-       void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+       void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
        void (*pmd_clear)(pmd_t *pmdp);
 
        unsigned long long (*pte_val)(pte_t);
@@ -217,21 +235,40 @@ struct paravirt_ops
        pgd_t (*make_pgd)(unsigned long pgd);
 #endif
 
-       /* Set deferred update mode, used for batching operations. */
-       void (*set_lazy_mode)(enum paravirt_lazy_mode mode);
+#ifdef CONFIG_HIGHPTE
+       void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
+#endif
 
-       /* These two are jmp to, not actually called. */
-       void (*irq_enable_sysexit)(void);
-       void (*iret)(void);
+       struct pv_lazy_ops lazy_mode;
 };
 
-extern struct paravirt_ops paravirt_ops;
+/* This contains all the paravirt structures: we get a convenient
+ * number for each function using the offset which we use to indicate
+ * what to patch. */
+struct paravirt_patch_template
+{
+       struct pv_init_ops pv_init_ops;
+       struct pv_time_ops pv_time_ops;
+       struct pv_cpu_ops pv_cpu_ops;
+       struct pv_irq_ops pv_irq_ops;
+       struct pv_apic_ops pv_apic_ops;
+       struct pv_mmu_ops pv_mmu_ops;
+};
+
+extern struct pv_info pv_info;
+extern struct pv_init_ops pv_init_ops;
+extern struct pv_time_ops pv_time_ops;
+extern struct pv_cpu_ops pv_cpu_ops;
+extern struct pv_irq_ops pv_irq_ops;
+extern struct pv_apic_ops pv_apic_ops;
+extern struct pv_mmu_ops pv_mmu_ops;
 
 #define PARAVIRT_PATCH(x)                                      \
-       (offsetof(struct paravirt_ops, x) / sizeof(void *))
+       (offsetof(struct paravirt_patch_template, x) / sizeof(void *))
 
-#define paravirt_type(type)                                    \
-       [paravirt_typenum] "i" (PARAVIRT_PATCH(type))
+#define paravirt_type(op)                              \
+       [paravirt_typenum] "i" (PARAVIRT_PATCH(op)),    \
+       [paravirt_opptr] "m" (op)
 #define paravirt_clobber(clobber)              \
        [paravirt_clobber] "i" (clobber)
 
@@ -258,7 +295,7 @@ unsigned paravirt_patch_call(void *insnbuf,
                             const void *target, u16 tgt_clobbers,
                             unsigned long addr, u16 site_clobbers,
                             unsigned len);
-unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
+unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
                            unsigned long addr, unsigned len);
 unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
                                unsigned long addr, unsigned len);
@@ -271,14 +308,14 @@ int paravirt_disable_iospace(void);
 /*
  * This generates an indirect call based on the operation type number.
  * The type number, computed in PARAVIRT_PATCH, is derived from the
- * offset into the paravirt_ops structure, and can therefore be freely
- * converted back into a structure offset.
+ * offset into the paravirt_patch_template structure, and can therefore be
+ * freely converted back into a structure offset.
  */
-#define PARAVIRT_CALL  "call *(paravirt_ops+%c[paravirt_typenum]*4);"
+#define PARAVIRT_CALL  "call *%[paravirt_opptr];"
 
 /*
- * These macros are intended to wrap calls into a paravirt_ops
- * operation, so that they can be later identified and patched at
+ * These macros are intended to wrap calls through one of the paravirt
+ * ops structs, so that they can be later identified and patched at
  * runtime.
  *
  * Normally, a call to a pv_op function is a simple indirect call:
@@ -301,7 +338,7 @@ int paravirt_disable_iospace(void);
  * The call instruction itself is marked by placing its start address
  * and size into the .parainstructions section, so that
  * apply_paravirt() in arch/i386/kernel/alternative.c can do the
- * appropriate patching under the control of the backend paravirt_ops
+ * appropriate patching under the control of the backend pv_init_ops
  * implementation.
  *
  * Unfortunately there's no way to get gcc to generate the args setup
@@ -409,36 +446,36 @@ int paravirt_disable_iospace(void);
 
 static inline int paravirt_enabled(void)
 {
-       return paravirt_ops.paravirt_enabled;
+       return pv_info.paravirt_enabled;
 }
 
 static inline void load_esp0(struct tss_struct *tss,
                             struct thread_struct *thread)
 {
-       PVOP_VCALL2(load_esp0, tss, thread);
+       PVOP_VCALL2(pv_cpu_ops.load_esp0, tss, thread);
 }
 
-#define ARCH_SETUP                     paravirt_ops.arch_setup();
+#define ARCH_SETUP                     pv_init_ops.arch_setup();
 static inline unsigned long get_wallclock(void)
 {
-       return PVOP_CALL0(unsigned long, get_wallclock);
+       return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock);
 }
 
 static inline int set_wallclock(unsigned long nowtime)
 {
-       return PVOP_CALL1(int, set_wallclock, nowtime);
+       return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime);
 }
 
 static inline void (*choose_time_init(void))(void)
 {
-       return paravirt_ops.time_init;
+       return pv_time_ops.time_init;
 }
 
 /* The paravirtualized CPUID instruction. */
 static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
                           unsigned int *ecx, unsigned int *edx)
 {
-       PVOP_VCALL4(cpuid, eax, ebx, ecx, edx);
+       PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx);
 }
 
 /*
@@ -446,87 +483,87 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
  */
 static inline unsigned long paravirt_get_debugreg(int reg)
 {
-       return PVOP_CALL1(unsigned long, get_debugreg, reg);
+       return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg);
 }
 #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg)
 static inline void set_debugreg(unsigned long val, int reg)
 {
-       PVOP_VCALL2(set_debugreg, reg, val);
+       PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
 }
 
 static inline void clts(void)
 {
-       PVOP_VCALL0(clts);
+       PVOP_VCALL0(pv_cpu_ops.clts);
 }
 
 static inline unsigned long read_cr0(void)
 {
-       return PVOP_CALL0(unsigned long, read_cr0);
+       return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
 }
 
 static inline void write_cr0(unsigned long x)
 {
-       PVOP_VCALL1(write_cr0, x);
+       PVOP_VCALL1(pv_cpu_ops.write_cr0, x);
 }
 
 static inline unsigned long read_cr2(void)
 {
-       return PVOP_CALL0(unsigned long, read_cr2);
+       return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2);
 }
 
 static inline void write_cr2(unsigned long x)
 {
-       PVOP_VCALL1(write_cr2, x);
+       PVOP_VCALL1(pv_mmu_ops.write_cr2, x);
 }
 
 static inline unsigned long read_cr3(void)
 {
-       return PVOP_CALL0(unsigned long, read_cr3);
+       return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3);
 }
 
 static inline void write_cr3(unsigned long x)
 {
-       PVOP_VCALL1(write_cr3, x);
+       PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
 }
 
 static inline unsigned long read_cr4(void)
 {
-       return PVOP_CALL0(unsigned long, read_cr4);
+       return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
 }
 static inline unsigned long read_cr4_safe(void)
 {
-       return PVOP_CALL0(unsigned long, read_cr4_safe);
+       return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
 }
 
 static inline void write_cr4(unsigned long x)
 {
-       PVOP_VCALL1(write_cr4, x);
+       PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
 }
 
 static inline void raw_safe_halt(void)
 {
-       PVOP_VCALL0(safe_halt);
+       PVOP_VCALL0(pv_irq_ops.safe_halt);
 }
 
 static inline void halt(void)
 {
-       PVOP_VCALL0(safe_halt);
+       PVOP_VCALL0(pv_irq_ops.safe_halt);
 }
 
 static inline void wbinvd(void)
 {
-       PVOP_VCALL0(wbinvd);
+       PVOP_VCALL0(pv_cpu_ops.wbinvd);
 }
 
-#define get_kernel_rpl()  (paravirt_ops.kernel_rpl)
+#define get_kernel_rpl()  (pv_info.kernel_rpl)
 
 static inline u64 paravirt_read_msr(unsigned msr, int *err)
 {
-       return PVOP_CALL2(u64, read_msr, msr, err);
+       return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err);
 }
 static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
 {
-       return PVOP_CALL3(int, write_msr, msr, low, high);
+       return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high);
 }
 
 /* These should all do BUG_ON(_err), but our headers are too tangled. */
@@ -560,7 +597,7 @@ static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high)
 
 static inline u64 paravirt_read_tsc(void)
 {
-       return PVOP_CALL0(u64, read_tsc);
+       return PVOP_CALL0(u64, pv_cpu_ops.read_tsc);
 }
 
 #define rdtscl(low) do {                       \
@@ -572,15 +609,15 @@ static inline u64 paravirt_read_tsc(void)
 
 static inline unsigned long long paravirt_sched_clock(void)
 {
-       return PVOP_CALL0(unsigned long long, sched_clock);
+       return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
 }
-#define calculate_cpu_khz() (paravirt_ops.get_cpu_khz())
+#define calculate_cpu_khz() (pv_time_ops.get_cpu_khz())
 
 #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
 
 static inline unsigned long long paravirt_read_pmc(int counter)
 {
-       return PVOP_CALL1(u64, read_pmc, counter);
+       return PVOP_CALL1(u64, pv_cpu_ops.read_pmc, counter);
 }
 
 #define rdpmc(counter,low,high) do {           \
@@ -591,61 +628,61 @@ static inline unsigned long long paravirt_read_pmc(int counter)
 
 static inline void load_TR_desc(void)
 {
-       PVOP_VCALL0(load_tr_desc);
+       PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
 }
 static inline void load_gdt(const struct Xgt_desc_struct *dtr)
 {
-       PVOP_VCALL1(load_gdt, dtr);
+       PVOP_VCALL1(pv_cpu_ops.load_gdt, dtr);
 }
 static inline void load_idt(const struct Xgt_desc_struct *dtr)
 {
-       PVOP_VCALL1(load_idt, dtr);
+       PVOP_VCALL1(pv_cpu_ops.load_idt, dtr);
 }
 static inline void set_ldt(const void *addr, unsigned entries)
 {
-       PVOP_VCALL2(set_ldt, addr, entries);
+       PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
 }
 static inline void store_gdt(struct Xgt_desc_struct *dtr)
 {
-       PVOP_VCALL1(store_gdt, dtr);
+       PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
 }
 static inline void store_idt(struct Xgt_desc_struct *dtr)
 {
-       PVOP_VCALL1(store_idt, dtr);
+       PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
 }
 static inline unsigned long paravirt_store_tr(void)
 {
-       return PVOP_CALL0(unsigned long, store_tr);
+       return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
 }
 #define store_tr(tr)   ((tr) = paravirt_store_tr())
 static inline void load_TLS(struct thread_struct *t, unsigned cpu)
 {
-       PVOP_VCALL2(load_tls, t, cpu);
+       PVOP_VCALL2(pv_cpu_ops.load_tls, t, cpu);
 }
 static inline void write_ldt_entry(void *dt, int entry, u32 low, u32 high)
 {
-       PVOP_VCALL4(write_ldt_entry, dt, entry, low, high);
+       PVOP_VCALL4(pv_cpu_ops.write_ldt_entry, dt, entry, low, high);
 }
 static inline void write_gdt_entry(void *dt, int entry, u32 low, u32 high)
 {
-       PVOP_VCALL4(write_gdt_entry, dt, entry, low, high);
+       PVOP_VCALL4(pv_cpu_ops.write_gdt_entry, dt, entry, low, high);
 }
 static inline void write_idt_entry(void *dt, int entry, u32 low, u32 high)
 {
-       PVOP_VCALL4(write_idt_entry, dt, entry, low, high);
+       PVOP_VCALL4(pv_cpu_ops.write_idt_entry, dt, entry, low, high);
 }
 static inline void set_iopl_mask(unsigned mask)
 {
-       PVOP_VCALL1(set_iopl_mask, mask);
+       PVOP_VCALL1(pv_cpu_ops.set_iopl_mask, mask);
 }
 
 /* The paravirtualized I/O functions */
 static inline void slow_down_io(void) {
-       paravirt_ops.io_delay();
+       pv_cpu_ops.io_delay();
 #ifdef REALLY_SLOW_IO
-       paravirt_ops.io_delay();
-       paravirt_ops.io_delay();
-       paravirt_ops.io_delay();
+       pv_cpu_ops.io_delay();
+       pv_cpu_ops.io_delay();
+       pv_cpu_ops.io_delay();
 #endif
 }
 
@@ -655,121 +692,120 @@ static inline void slow_down_io(void) {
  */
 static inline void apic_write(unsigned long reg, unsigned long v)
 {
-       PVOP_VCALL2(apic_write, reg, v);
+       PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
 }
 
 static inline void apic_write_atomic(unsigned long reg, unsigned long v)
 {
-       PVOP_VCALL2(apic_write_atomic, reg, v);
+       PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
 }
 
 static inline unsigned long apic_read(unsigned long reg)
 {
-       return PVOP_CALL1(unsigned long, apic_read, reg);
+       return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
 }
 
 static inline void setup_boot_clock(void)
 {
-       PVOP_VCALL0(setup_boot_clock);
+       PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
 }
 
 static inline void setup_secondary_clock(void)
 {
-       PVOP_VCALL0(setup_secondary_clock);
+       PVOP_VCALL0(pv_apic_ops.setup_secondary_clock);
 }
 #endif
 
 static inline void paravirt_post_allocator_init(void)
 {
-       if (paravirt_ops.post_allocator_init)
-               (*paravirt_ops.post_allocator_init)();
+       if (pv_init_ops.post_allocator_init)
+               (*pv_init_ops.post_allocator_init)();
 }
 
 static inline void paravirt_pagetable_setup_start(pgd_t *base)
 {
-       if (paravirt_ops.pagetable_setup_start)
-               (*paravirt_ops.pagetable_setup_start)(base);
+       (*pv_mmu_ops.pagetable_setup_start)(base);
 }
 
 static inline void paravirt_pagetable_setup_done(pgd_t *base)
 {
-       if (paravirt_ops.pagetable_setup_done)
-               (*paravirt_ops.pagetable_setup_done)(base);
+       (*pv_mmu_ops.pagetable_setup_done)(base);
 }
 
 #ifdef CONFIG_SMP
 static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
                                    unsigned long start_esp)
 {
-       PVOP_VCALL3(startup_ipi_hook, phys_apicid, start_eip, start_esp);
+       PVOP_VCALL3(pv_apic_ops.startup_ipi_hook,
+                   phys_apicid, start_eip, start_esp);
 }
 #endif
 
 static inline void paravirt_activate_mm(struct mm_struct *prev,
                                        struct mm_struct *next)
 {
-       PVOP_VCALL2(activate_mm, prev, next);
+       PVOP_VCALL2(pv_mmu_ops.activate_mm, prev, next);
 }
 
 static inline void arch_dup_mmap(struct mm_struct *oldmm,
                                 struct mm_struct *mm)
 {
-       PVOP_VCALL2(dup_mmap, oldmm, mm);
+       PVOP_VCALL2(pv_mmu_ops.dup_mmap, oldmm, mm);
 }
 
 static inline void arch_exit_mmap(struct mm_struct *mm)
 {
-       PVOP_VCALL1(exit_mmap, mm);
+       PVOP_VCALL1(pv_mmu_ops.exit_mmap, mm);
 }
 
 static inline void __flush_tlb(void)
 {
-       PVOP_VCALL0(flush_tlb_user);
+       PVOP_VCALL0(pv_mmu_ops.flush_tlb_user);
 }
 static inline void __flush_tlb_global(void)
 {
-       PVOP_VCALL0(flush_tlb_kernel);
+       PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
 }
 static inline void __flush_tlb_single(unsigned long addr)
 {
-       PVOP_VCALL1(flush_tlb_single, addr);
+       PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
 }
 
 static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
                                    unsigned long va)
 {
-       PVOP_VCALL3(flush_tlb_others, &cpumask, mm, va);
+       PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
 }
 
 static inline void paravirt_alloc_pt(struct mm_struct *mm, unsigned pfn)
 {
-       PVOP_VCALL2(alloc_pt, mm, pfn);
+       PVOP_VCALL2(pv_mmu_ops.alloc_pt, mm, pfn);
 }
 static inline void paravirt_release_pt(unsigned pfn)
 {
-       PVOP_VCALL1(release_pt, pfn);
+       PVOP_VCALL1(pv_mmu_ops.release_pt, pfn);
 }
 
 static inline void paravirt_alloc_pd(unsigned pfn)
 {
-       PVOP_VCALL1(alloc_pd, pfn);
+       PVOP_VCALL1(pv_mmu_ops.alloc_pd, pfn);
 }
 
 static inline void paravirt_alloc_pd_clone(unsigned pfn, unsigned clonepfn,
                                           unsigned start, unsigned count)
 {
-       PVOP_VCALL4(alloc_pd_clone, pfn, clonepfn, start, count);
+       PVOP_VCALL4(pv_mmu_ops.alloc_pd_clone, pfn, clonepfn, start, count);
 }
 static inline void paravirt_release_pd(unsigned pfn)
 {
-       PVOP_VCALL1(release_pd, pfn);
+       PVOP_VCALL1(pv_mmu_ops.release_pd, pfn);
 }
 
 #ifdef CONFIG_HIGHPTE
 static inline void *kmap_atomic_pte(struct page *page, enum km_type type)
 {
        unsigned long ret;
-       ret = PVOP_CALL2(unsigned long, kmap_atomic_pte, page, type);
+       ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type);
        return (void *)ret;
 }
 #endif
@@ -777,162 +813,191 @@ static inline void *kmap_atomic_pte(struct page *page, enum km_type type)
 static inline void pte_update(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep)
 {
-       PVOP_VCALL3(pte_update, mm, addr, ptep);
+       PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
 }
 
 static inline void pte_update_defer(struct mm_struct *mm, unsigned long addr,
                                    pte_t *ptep)
 {
-       PVOP_VCALL3(pte_update_defer, mm, addr, ptep);
+       PVOP_VCALL3(pv_mmu_ops.pte_update_defer, mm, addr, ptep);
 }
 
 #ifdef CONFIG_X86_PAE
 static inline pte_t __pte(unsigned long long val)
 {
-       unsigned long long ret = PVOP_CALL2(unsigned long long, make_pte,
+       unsigned long long ret = PVOP_CALL2(unsigned long long,
+                                           pv_mmu_ops.make_pte,
                                            val, val >> 32);
        return (pte_t) { ret, ret >> 32 };
 }
 
 static inline pmd_t __pmd(unsigned long long val)
 {
-       return (pmd_t) { PVOP_CALL2(unsigned long long, make_pmd, val, val >> 32) };
+       return (pmd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pmd,
+                                   val, val >> 32) };
 }
 
 static inline pgd_t __pgd(unsigned long long val)
 {
-       return (pgd_t) { PVOP_CALL2(unsigned long long, make_pgd, val, val >> 32) };
+       return (pgd_t) { PVOP_CALL2(unsigned long long, pv_mmu_ops.make_pgd,
+                                   val, val >> 32) };
 }
 
 static inline unsigned long long pte_val(pte_t x)
 {
-       return PVOP_CALL2(unsigned long long, pte_val, x.pte_low, x.pte_high);
+       return PVOP_CALL2(unsigned long long, pv_mmu_ops.pte_val,
+                         x.pte_low, x.pte_high);
 }
 
 static inline unsigned long long pmd_val(pmd_t x)
 {
-       return PVOP_CALL2(unsigned long long, pmd_val, x.pmd, x.pmd >> 32);
+       return PVOP_CALL2(unsigned long long, pv_mmu_ops.pmd_val,
+                         x.pmd, x.pmd >> 32);
 }
 
 static inline unsigned long long pgd_val(pgd_t x)
 {
-       return PVOP_CALL2(unsigned long long, pgd_val, x.pgd, x.pgd >> 32);
+       return PVOP_CALL2(unsigned long long, pv_mmu_ops.pgd_val,
+                         x.pgd, x.pgd >> 32);
 }
 
 static inline void set_pte(pte_t *ptep, pte_t pteval)
 {
-       PVOP_VCALL3(set_pte, ptep, pteval.pte_low, pteval.pte_high);
+       PVOP_VCALL3(pv_mmu_ops.set_pte, ptep, pteval.pte_low, pteval.pte_high);
 }
 
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep, pte_t pteval)
 {
        /* 5 arg words */
-       paravirt_ops.set_pte_at(mm, addr, ptep, pteval);
+       pv_mmu_ops.set_pte_at(mm, addr, ptep, pteval);
 }
 
 static inline void set_pte_atomic(pte_t *ptep, pte_t pteval)
 {
-       PVOP_VCALL3(set_pte_atomic, ptep, pteval.pte_low, pteval.pte_high);
+       PVOP_VCALL3(pv_mmu_ops.set_pte_atomic, ptep,
+                   pteval.pte_low, pteval.pte_high);
 }
 
 static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
                                   pte_t *ptep, pte_t pte)
 {
        /* 5 arg words */
-       paravirt_ops.set_pte_present(mm, addr, ptep, pte);
+       pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
 }
 
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
-       PVOP_VCALL3(set_pmd, pmdp, pmdval.pmd, pmdval.pmd >> 32);
+       PVOP_VCALL3(pv_mmu_ops.set_pmd, pmdp,
+                   pmdval.pmd, pmdval.pmd >> 32);
 }
 
 static inline void set_pud(pud_t *pudp, pud_t pudval)
 {
-       PVOP_VCALL3(set_pud, pudp, pudval.pgd.pgd, pudval.pgd.pgd >> 32);
+       PVOP_VCALL3(pv_mmu_ops.set_pud, pudp,
+                   pudval.pgd.pgd, pudval.pgd.pgd >> 32);
 }
 
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
-       PVOP_VCALL3(pte_clear, mm, addr, ptep);
+       PVOP_VCALL3(pv_mmu_ops.pte_clear, mm, addr, ptep);
 }
 
 static inline void pmd_clear(pmd_t *pmdp)
 {
-       PVOP_VCALL1(pmd_clear, pmdp);
+       PVOP_VCALL1(pv_mmu_ops.pmd_clear, pmdp);
 }
 
 #else  /* !CONFIG_X86_PAE */
 
 static inline pte_t __pte(unsigned long val)
 {
-       return (pte_t) { PVOP_CALL1(unsigned long, make_pte, val) };
+       return (pte_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pte, val) };
 }
 
 static inline pgd_t __pgd(unsigned long val)
 {
-       return (pgd_t) { PVOP_CALL1(unsigned long, make_pgd, val) };
+       return (pgd_t) { PVOP_CALL1(unsigned long, pv_mmu_ops.make_pgd, val) };
 }
 
 static inline unsigned long pte_val(pte_t x)
 {
-       return PVOP_CALL1(unsigned long, pte_val, x.pte_low);
+       return PVOP_CALL1(unsigned long, pv_mmu_ops.pte_val, x.pte_low);
 }
 
 static inline unsigned long pgd_val(pgd_t x)
 {
-       return PVOP_CALL1(unsigned long, pgd_val, x.pgd);
+       return PVOP_CALL1(unsigned long, pv_mmu_ops.pgd_val, x.pgd);
 }
 
 static inline void set_pte(pte_t *ptep, pte_t pteval)
 {
-       PVOP_VCALL2(set_pte, ptep, pteval.pte_low);
+       PVOP_VCALL2(pv_mmu_ops.set_pte, ptep, pteval.pte_low);
 }
 
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep, pte_t pteval)
 {
-       PVOP_VCALL4(set_pte_at, mm, addr, ptep, pteval.pte_low);
+       PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pteval.pte_low);
 }
 
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
-       PVOP_VCALL2(set_pmd, pmdp, pmdval.pud.pgd.pgd);
+       PVOP_VCALL2(pv_mmu_ops.set_pmd, pmdp, pmdval.pud.pgd.pgd);
 }
 #endif /* CONFIG_X86_PAE */
 
+/* Lazy mode for batching updates / context switch */
+enum paravirt_lazy_mode {
+       PARAVIRT_LAZY_NONE,
+       PARAVIRT_LAZY_MMU,
+       PARAVIRT_LAZY_CPU,
+};
+
+enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
+void paravirt_enter_lazy_cpu(void);
+void paravirt_leave_lazy_cpu(void);
+void paravirt_enter_lazy_mmu(void);
+void paravirt_leave_lazy_mmu(void);
+void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
+
 #define  __HAVE_ARCH_ENTER_LAZY_CPU_MODE
 static inline void arch_enter_lazy_cpu_mode(void)
 {
-       PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_CPU);
+       PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
 }
 
 static inline void arch_leave_lazy_cpu_mode(void)
 {
-       PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE);
+       PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
 }
 
 static inline void arch_flush_lazy_cpu_mode(void)
 {
-       PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH);
+       if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU)) {
+               arch_leave_lazy_cpu_mode();
+               arch_enter_lazy_cpu_mode();
+       }
 }
 
 
 #define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 static inline void arch_enter_lazy_mmu_mode(void)
 {
-       PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_MMU);
+       PVOP_VCALL0(pv_mmu_ops.lazy_mode.enter);
 }
 
 static inline void arch_leave_lazy_mmu_mode(void)
 {
-       PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_NONE);
+       PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
 }
 
 static inline void arch_flush_lazy_mmu_mode(void)
 {
-       PVOP_VCALL1(set_lazy_mode, PARAVIRT_LAZY_FLUSH);
+       if (unlikely(paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU)) {
+               arch_leave_lazy_mmu_mode();
+               arch_enter_lazy_mmu_mode();
+       }
 }
 
 void _paravirt_nop(void);
@@ -957,7 +1022,7 @@ static inline unsigned long __raw_local_save_flags(void)
                                  PARAVIRT_CALL
                                  "popl %%edx; popl %%ecx")
                     : "=a"(f)
-                    : paravirt_type(save_fl),
+                    : paravirt_type(pv_irq_ops.save_fl),
                       paravirt_clobber(CLBR_EAX)
                     : "memory", "cc");
        return f;
@@ -970,7 +1035,7 @@ static inline void raw_local_irq_restore(unsigned long f)
                                  "popl %%edx; popl %%ecx")
                     : "=a"(f)
                     : "0"(f),
-                      paravirt_type(restore_fl),
+                      paravirt_type(pv_irq_ops.restore_fl),
                       paravirt_clobber(CLBR_EAX)
                     : "memory", "cc");
 }
@@ -981,7 +1046,7 @@ static inline void raw_local_irq_disable(void)
                                  PARAVIRT_CALL
                                  "popl %%edx; popl %%ecx")
                     :
-                    : paravirt_type(irq_disable),
+                    : paravirt_type(pv_irq_ops.irq_disable),
                       paravirt_clobber(CLBR_EAX)
                     : "memory", "eax", "cc");
 }
@@ -992,7 +1057,7 @@ static inline void raw_local_irq_enable(void)
                                  PARAVIRT_CALL
                                  "popl %%edx; popl %%ecx")
                     :
-                    : paravirt_type(irq_enable),
+                    : paravirt_type(pv_irq_ops.irq_enable),
                       paravirt_clobber(CLBR_EAX)
                     : "memory", "eax", "cc");
 }
@@ -1008,21 +1073,23 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #define CLI_STRING                                                     \
        _paravirt_alt("pushl %%ecx; pushl %%edx;"                       \
-                     "call *paravirt_ops+%c[paravirt_cli_type]*4;"     \
+                     "call *%[paravirt_cli_opptr];"                    \
                      "popl %%edx; popl %%ecx",                         \
                      "%c[paravirt_cli_type]", "%c[paravirt_clobber]")
 
 #define STI_STRING                                                     \
        _paravirt_alt("pushl %%ecx; pushl %%edx;"                       \
-                     "call *paravirt_ops+%c[paravirt_sti_type]*4;"     \
+                     "call *%[paravirt_sti_opptr];"                    \
                      "popl %%edx; popl %%ecx",                         \
                      "%c[paravirt_sti_type]", "%c[paravirt_clobber]")
 
 #define CLI_STI_CLOBBERS , "%eax"
 #define CLI_STI_INPUT_ARGS                                             \
        ,                                                               \
-       [paravirt_cli_type] "i" (PARAVIRT_PATCH(irq_disable)),          \
-       [paravirt_sti_type] "i" (PARAVIRT_PATCH(irq_enable)),           \
+       [paravirt_cli_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_disable)),               \
+       [paravirt_cli_opptr] "m" (pv_irq_ops.irq_disable),              \
+       [paravirt_sti_type] "i" (PARAVIRT_PATCH(pv_irq_ops.irq_enable)),                \
+       [paravirt_sti_opptr] "m" (pv_irq_ops.irq_enable),               \
        paravirt_clobber(CLBR_EAX)
 
 /* Make sure as little as possible of this mess escapes. */
@@ -1042,7 +1109,7 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #else  /* __ASSEMBLY__ */
 
-#define PARA_PATCH(off)        ((off) / 4)
+#define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
 
 #define PARA_SITE(ptype, clobbers, ops)                \
 771:;                                          \
@@ -1055,29 +1122,29 @@ static inline unsigned long __raw_local_irq_save(void)
         .short clobbers;                       \
        .popsection
 
-#define INTERRUPT_RETURN                                       \
-       PARA_SITE(PARA_PATCH(PARAVIRT_iret), CLBR_NONE,         \
-                 jmp *%cs:paravirt_ops+PARAVIRT_iret)
+#define INTERRUPT_RETURN                                               \
+       PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE,       \
+                 jmp *%cs:pv_cpu_ops+PV_CPU_iret)
 
 #define DISABLE_INTERRUPTS(clobbers)                                   \
-       PARA_SITE(PARA_PATCH(PARAVIRT_irq_disable), clobbers,           \
+       PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
                  pushl %eax; pushl %ecx; pushl %edx;                   \
-                 call *%cs:paravirt_ops+PARAVIRT_irq_disable;          \
+                 call *%cs:pv_irq_ops+PV_IRQ_irq_disable;              \
                  popl %edx; popl %ecx; popl %eax)                      \
 
 #define ENABLE_INTERRUPTS(clobbers)                                    \
-       PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable), clobbers,            \
+       PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,  \
                  pushl %eax; pushl %ecx; pushl %edx;                   \
-                 call *%cs:paravirt_ops+PARAVIRT_irq_enable;           \
+                 call *%cs:pv_irq_ops+PV_IRQ_irq_enable;               \
                  popl %edx; popl %ecx; popl %eax)
 
-#define ENABLE_INTERRUPTS_SYSEXIT                                      \
-       PARA_SITE(PARA_PATCH(PARAVIRT_irq_enable_sysexit), CLBR_NONE,   \
-                 jmp *%cs:paravirt_ops+PARAVIRT_irq_enable_sysexit)
+#define ENABLE_INTERRUPTS_SYSEXIT                                             \
+       PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), CLBR_NONE,\
+                 jmp *%cs:pv_cpu_ops+PV_CPU_irq_enable_sysexit)
 
 #define GET_CR0_INTO_EAX                       \
        push %ecx; push %edx;                   \
-       call *paravirt_ops+PARAVIRT_read_cr0;   \
+       call *pv_cpu_ops+PV_CPU_read_cr0;       \
        pop %edx; pop %ecx
 
 #endif /* __ASSEMBLY__ */
index c0df89f..448ac95 100644 (file)
@@ -2,7 +2,7 @@
 #define _I386_PGTABLE_3LEVEL_DEFS_H
 
 #ifdef CONFIG_PARAVIRT
-#define SHARED_KERNEL_PMD      (paravirt_ops.shared_kernel_pmd)
+#define SHARED_KERNEL_PMD      (pv_info.shared_kernel_pmd)
 #else
 #define SHARED_KERNEL_PMD      1
 #endif
index ff61ea3..b05d8a6 100644 (file)
@@ -160,8 +160,9 @@ struct vcpu_set_singleshot_timer {
  */
 #define VCPUOP_register_vcpu_info   10  /* arg == struct vcpu_info */
 struct vcpu_register_vcpu_info {
-    uint32_t mfn;               /* mfn of page to place vcpu_info */
-    uint32_t offset;            /* offset within page */
+    uint64_t mfn;    /* mfn of page to place vcpu_info */
+    uint32_t offset; /* offset within page */
+    uint32_t rsvd;   /* unused */
 };
 
 #endif /* __XEN_PUBLIC_VCPU_H__ */
index 1cc6cad..b1f03b0 100644 (file)
@@ -155,7 +155,6 @@ config SPLIT_PTLOCK_CPUS
        int
        default "4096" if ARM && !CPU_CACHE_VIPT
        default "4096" if PARISC && !PA20
-       default "4096" if XEN
        default "4"
 
 #