]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - arch/powerpc/kvm/book3s.c
KVM: PPC: Move fields between struct kvm_vcpu_arch and kvmppc_vcpu_book3s
[linux-2.6.git] / arch / powerpc / kvm / book3s.c
index 6416f227d34567c8737c39d37c3cfa201c0e1ace..5d0babefe9137d0552cc949f9dfa3e8bc6ec6b12 100644 (file)
@@ -16,6 +16,7 @@
 
 #include <linux/kvm_host.h>
 #include <linux/err.h>
+#include <linux/slab.h>
 
 #include <asm/reg.h>
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
 #include <asm/mmu_context.h>
+#include <asm/page.h>
 #include <linux/gfp.h>
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
+#include <linux/highmem.h>
+
+#include "trace.h"
 
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 /* #define EXIT_DEBUG */
-/* #define EXIT_DEBUG_SIMPLE */
 /* #define DEBUG_EXT */
 
-static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
+static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
+                            ulong msr);
+
+/* Some compatibility defines */
+#ifdef CONFIG_PPC_BOOK3S_32
+#define MSR_USER32 MSR_USER
+#define MSR_USER64 MSR_USER
+#define HW_PAGE_SIZE PAGE_SIZE
+#endif
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "exits",       VCPU_STAT(sum_exits) },
@@ -69,80 +81,98 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
-       memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb));
-       memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu,
+#ifdef CONFIG_PPC_BOOK3S_64
+       memcpy(to_svcpu(vcpu)->slb, to_book3s(vcpu)->slb_shadow, sizeof(to_svcpu(vcpu)->slb));
+       memcpy(&get_paca()->shadow_vcpu, to_book3s(vcpu)->shadow_vcpu,
               sizeof(get_paca()->shadow_vcpu));
-       get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max;
+       to_svcpu(vcpu)->slb_max = to_book3s(vcpu)->slb_shadow_max;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+       current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
+#endif
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
-       memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb));
-       memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
+#ifdef CONFIG_PPC_BOOK3S_64
+       memcpy(to_book3s(vcpu)->slb_shadow, to_svcpu(vcpu)->slb, sizeof(to_svcpu(vcpu)->slb));
+       memcpy(to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
               sizeof(get_paca()->shadow_vcpu));
-       to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max;
+       to_book3s(vcpu)->slb_shadow_max = to_svcpu(vcpu)->slb_max;
+#endif
 
        kvmppc_giveup_ext(vcpu, MSR_FP);
        kvmppc_giveup_ext(vcpu, MSR_VEC);
        kvmppc_giveup_ext(vcpu, MSR_VSX);
 }
 
-#if defined(EXIT_DEBUG)
-static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
-{
-       u64 jd = mftb() - vcpu->arch.dec_jiffies;
-       return vcpu->arch.dec - jd;
-}
-#endif
-
 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 {
-       vcpu->arch.shadow_msr = vcpu->arch.msr;
+       ulong smsr = vcpu->arch.shared->msr;
+
        /* Guest MSR values */
-       vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
-                                MSR_BE | MSR_DE;
+       smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
        /* Process MSR values */
-       vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
-                                MSR_EE;
+       smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
        /* External providers the guest reserved */
-       vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
+       smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
        /* 64-bit Process MSR values */
 #ifdef CONFIG_PPC_BOOK3S_64
-       vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
+       smsr |= MSR_ISF | MSR_HV;
 #endif
+       vcpu->arch.shadow_msr = smsr;
 }
 
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 {
-       ulong old_msr = vcpu->arch.msr;
+       ulong old_msr = vcpu->arch.shared->msr;
 
 #ifdef EXIT_DEBUG
        printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
 #endif
 
        msr &= to_book3s(vcpu)->msr_mask;
-       vcpu->arch.msr = msr;
+       vcpu->arch.shared->msr = msr;
        kvmppc_recalc_shadow_msr(vcpu);
 
-       if (msr & (MSR_WE|MSR_POW)) {
+       if (msr & MSR_POW) {
                if (!vcpu->arch.pending_exceptions) {
                        kvm_vcpu_block(vcpu);
                        vcpu->stat.halt_wakeup++;
+
+                       /* Unset POW bit after we woke up */
+                       msr &= ~MSR_POW;
+                       vcpu->arch.shared->msr = msr;
                }
        }
 
-       if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) ||
-           (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) {
+       if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
+                  (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
                kvmppc_mmu_flush_segments(vcpu);
-               kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc);
+               kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+
+               /* Preload magic page segment when in kernel mode */
+               if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
+                       struct kvm_vcpu_arch *a = &vcpu->arch;
+
+                       if (msr & MSR_DR)
+                               kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
+                       else
+                               kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
+               }
        }
+
+       /* Preload FPU if it's enabled */
+       if (vcpu->arch.shared->msr & MSR_FP)
+               kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 }
 
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
-       vcpu->arch.srr0 = vcpu->arch.pc;
-       vcpu->arch.srr1 = vcpu->arch.msr | flags;
-       vcpu->arch.pc = to_book3s(vcpu)->hior + vec;
+       vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
+       vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags;
+       kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec);
        vcpu->arch.mmu.reset_msr(vcpu);
 }
 
@@ -158,6 +188,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
        case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE;         break;
        case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT;         break;
        case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL;             break;
+       case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL;       break;
        case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT;            break;
        case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM;              break;
        case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL;           break;
@@ -177,6 +208,9 @@ static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
 {
        clear_bit(kvmppc_book3s_vec2irqprio(vec),
                  &vcpu->arch.pending_exceptions);
+
+       if (!vcpu->arch.pending_exceptions)
+               vcpu->arch.shared->int_pending = 0;
 }
 
 void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
@@ -204,7 +238,7 @@ void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
 
 int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
 {
-       return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
+       return test_bit(BOOK3S_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
 }
 
 void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
@@ -215,7 +249,19 @@ void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
 {
-       kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+       unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
+
+       if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
+               vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
+
+       kvmppc_book3s_queue_irqprio(vcpu, vec);
+}
+
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
+                                  struct kvm_interrupt *irq)
+{
+       kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+       kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
 }
 
 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
@@ -223,14 +269,29 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
        int deliver = 1;
        int vec = 0;
        ulong flags = 0ULL;
+       ulong crit_raw = vcpu->arch.shared->critical;
+       ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
+       bool crit;
+
+       /* Truncate crit indicators in 32 bit mode */
+       if (!(vcpu->arch.shared->msr & MSR_SF)) {
+               crit_raw &= 0xffffffff;
+               crit_r1 &= 0xffffffff;
+       }
+
+       /* Critical section when crit == r1 */
+       crit = (crit_raw == crit_r1);
+       /* ... and we're in supervisor mode */
+       crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
 
        switch (priority) {
        case BOOK3S_IRQPRIO_DECREMENTER:
-               deliver = vcpu->arch.msr & MSR_EE;
+               deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
                vec = BOOK3S_INTERRUPT_DECREMENTER;
                break;
        case BOOK3S_IRQPRIO_EXTERNAL:
-               deliver = vcpu->arch.msr & MSR_EE;
+       case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
+               deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
                vec = BOOK3S_INTERRUPT_EXTERNAL;
                break;
        case BOOK3S_IRQPRIO_SYSTEM_RESET:
@@ -292,9 +353,27 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
        return deliver;
 }
 
+/*
+ * This function determines if an irqprio should be cleared once issued.
+ */
+static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+       switch (priority) {
+               case BOOK3S_IRQPRIO_DECREMENTER:
+                       /* DEC interrupts get cleared by mtdec */
+                       return false;
+               case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
+                       /* External interrupts get cleared by userspace */
+                       return false;
+       }
+
+       return true;
+}
+
 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 {
        unsigned long *pending = &vcpu->arch.pending_exceptions;
+       unsigned long old_pending = vcpu->arch.pending_exceptions;
        unsigned int priority;
 
 #ifdef EXIT_DEBUG
@@ -302,10 +381,9 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
                printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
 #endif
        priority = __ffs(*pending);
-       while (priority <= (sizeof(unsigned int) * 8)) {
+       while (priority < BOOK3S_IRQPRIO_MAX) {
                if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
-                   (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
-                       /* DEC interrupts get cleared by mtdec */
+                   clear_irqprio(vcpu, priority)) {
                        clear_bit(priority, &vcpu->arch.pending_exceptions);
                        break;
                }
@@ -314,17 +392,28 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
                                         BITS_PER_BYTE * sizeof(*pending),
                                         priority + 1);
        }
+
+       /* Tell the guest about our interrupt status */
+       if (*pending)
+               vcpu->arch.shared->int_pending = 1;
+       else if (old_pending)
+               vcpu->arch.shared->int_pending = 0;
 }
 
 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
 {
+       u32 host_pvr;
+
        vcpu->arch.hflags &= ~BOOK3S_HFLAG_SLB;
        vcpu->arch.pvr = pvr;
+#ifdef CONFIG_PPC_BOOK3S_64
        if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
                kvmppc_mmu_book3s_64_init(vcpu);
                to_book3s(vcpu)->hior = 0xfff00000;
                to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
-       } else {
+       } else
+#endif
+       {
                kvmppc_mmu_book3s_32_init(vcpu);
                to_book3s(vcpu)->hior = 0;
                to_book3s(vcpu)->msr_mask = 0xffffffffULL;
@@ -337,6 +426,51 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
            !strcmp(cur_cpu_spec->platform, "ppc970"))
                vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
 
+       /* Cell performs badly if MSR_FEx are set. So let's hope nobody
+          really needs them in a VM on Cell and force disable them. */
+       if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be"))
+               to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1);
+
+#ifdef CONFIG_PPC_BOOK3S_32
+       /* 32 bit Book3S always has 32 byte dcbz */
+       vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
+#endif
+
+       /* On some CPUs we can execute paired single operations natively */
+       asm ( "mfpvr %0" : "=r"(host_pvr));
+       switch (host_pvr) {
+       case 0x00080200:        /* lonestar 2.0 */
+       case 0x00088202:        /* lonestar 2.2 */
+       case 0x70000100:        /* gekko 1.0 */
+       case 0x00080100:        /* gekko 2.0 */
+       case 0x00083203:        /* gekko 2.3a */
+       case 0x00083213:        /* gekko 2.3b */
+       case 0x00083204:        /* gekko 2.4 */
+       case 0x00083214:        /* gekko 2.4e (8SE) - retail HW2 */
+       case 0x00087200:        /* broadway */
+               vcpu->arch.hflags |= BOOK3S_HFLAG_NATIVE_PS;
+               /* Enable HID2.PSE - in case we need it later */
+               mtspr(SPRN_HID2_GEKKO, mfspr(SPRN_HID2_GEKKO) | (1 << 29));
+       }
+}
+
+pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+       ulong mp_pa = vcpu->arch.magic_page_pa;
+
+       /* Magic page override */
+       if (unlikely(mp_pa) &&
+           unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
+                    ((mp_pa & PAGE_MASK) & KVM_PAM))) {
+               ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
+               pfn_t pfn;
+
+               pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
+               get_page(pfn_to_page(pfn));
+               return pfn;
+       }
+
+       return gfn_to_pfn(vcpu->kvm, gfn);
 }
 
 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
@@ -350,56 +484,45 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
  */
 static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 {
-       bool touched = false;
-       hva_t hpage;
+       struct page *hpage;
+       u64 hpage_offset;
        u32 *page;
        int i;
 
-       hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
-       if (kvm_is_error_hva(hpage))
+       hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
+       if (is_error_page(hpage)) {
+               kvm_release_page_clean(hpage);
                return;
+       }
 
-       hpage |= pte->raddr & ~PAGE_MASK;
-       hpage &= ~0xFFFULL;
-
-       page = vmalloc(HW_PAGE_SIZE);
-
-       if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE))
-               goto out;
+       hpage_offset = pte->raddr & ~PAGE_MASK;
+       hpage_offset &= ~0xFFFULL;
+       hpage_offset /= 4;
 
-       for (i=0; i < HW_PAGE_SIZE / 4; i++)
-               if ((page[i] & 0xff0007ff) == INS_DCBZ) {
-                       page[i] &= 0xfffffff7; // reserved instruction, so we trap
-                       touched = true;
-               }
+       get_page(hpage);
+       page = kmap_atomic(hpage, KM_USER0);
 
-       if (touched)
-               copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE);
+       /* patch dcbz into reserved instruction, so we trap */
+       for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
+               if ((page[i] & 0xff0007ff) == INS_DCBZ)
+                       page[i] &= 0xfffffff7;
 
-out:
-       vfree(page);
+       kunmap_atomic(page, KM_USER0);
+       put_page(hpage);
 }
 
 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
                         struct kvmppc_pte *pte)
 {
-       int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR));
+       int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR));
        int r;
 
        if (relocated) {
                r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
        } else {
                pte->eaddr = eaddr;
-               pte->raddr = eaddr & 0xffffffff;
-               pte->vpage = eaddr >> 12;
-               switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
-               case 0:
-                       pte->vpage |= VSID_REAL;
-               case MSR_DR:
-                       pte->vpage |= VSID_REAL_DR;
-               case MSR_IR:
-                       pte->vpage |= VSID_REAL_IR;
-               }
+               pte->raddr = eaddr & KVM_PAM;
+               pte->vpage = VSID_REAL | eaddr >> 12;
                pte->may_read = true;
                pte->may_write = true;
                pte->may_execute = true;
@@ -434,59 +557,66 @@ err:
        return kvmppc_bad_hva();
 }
 
-int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr)
+int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+             bool data)
 {
        struct kvmppc_pte pte;
-       hva_t hva = eaddr;
 
        vcpu->stat.st++;
 
-       if (kvmppc_xlate(vcpu, eaddr, false, &pte))
-               goto err;
+       if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+               return -ENOENT;
 
-       hva = kvmppc_pte_to_hva(vcpu, &pte, false);
-       if (kvm_is_error_hva(hva))
-               goto err;
+       *eaddr = pte.raddr;
 
-       if (copy_to_user((void __user *)hva, ptr, size)) {
-               printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva);
-               goto err;
-       }
+       if (!pte.may_write)
+               return -EPERM;
 
-       return 0;
+       if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size))
+               return EMULATE_DO_MMIO;
 
-err:
-       return -ENOENT;
+       return EMULATE_DONE;
 }
 
-int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr,
+int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
                      bool data)
 {
        struct kvmppc_pte pte;
-       hva_t hva = eaddr;
+       hva_t hva = *eaddr;
 
        vcpu->stat.ld++;
 
-       if (kvmppc_xlate(vcpu, eaddr, data, &pte))
-               goto err;
+       if (kvmppc_xlate(vcpu, *eaddr, data, &pte))
+               goto nopte;
+
+       *eaddr = pte.raddr;
 
        hva = kvmppc_pte_to_hva(vcpu, &pte, true);
        if (kvm_is_error_hva(hva))
-               goto err;
+               goto mmio;
 
        if (copy_from_user(ptr, (void __user *)hva, size)) {
                printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva);
-               goto err;
+               goto mmio;
        }
 
-       return 0;
+       return EMULATE_DONE;
 
-err:
+nopte:
        return -ENOENT;
+mmio:
+       return EMULATE_DO_MMIO;
 }
 
 static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
+       ulong mp_pa = vcpu->arch.magic_page_pa;
+
+       if (unlikely(mp_pa) &&
+           unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
+               return 1;
+       }
+
        return kvm_is_visible_gfn(vcpu->kvm, gfn);
 }
 
@@ -499,12 +629,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        int page_found = 0;
        struct kvmppc_pte pte;
        bool is_mmio = false;
+       bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
+       bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
+       u64 vsid;
 
-       if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) {
-               relocated = (vcpu->arch.msr & MSR_DR);
-       } else {
-               relocated = (vcpu->arch.msr & MSR_IR);
-       }
+       relocated = data ? dr : ir;
 
        /* Resolve real address if translation turned on */
        if (relocated) {
@@ -513,17 +642,28 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                pte.may_execute = true;
                pte.may_read = true;
                pte.may_write = true;
-               pte.raddr = eaddr & 0xffffffff;
+               pte.raddr = eaddr & KVM_PAM;
                pte.eaddr = eaddr;
                pte.vpage = eaddr >> 12;
-               switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
-               case 0:
-                       pte.vpage |= VSID_REAL;
-               case MSR_DR:
-                       pte.vpage |= VSID_REAL_DR;
-               case MSR_IR:
-                       pte.vpage |= VSID_REAL_IR;
-               }
+       }
+
+       switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
+       case 0:
+               pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
+               break;
+       case MSR_DR:
+       case MSR_IR:
+               vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
+
+               if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
+                       pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
+               else
+                       pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
+               pte.vpage |= vsid;
+
+               if (vsid == -1)
+                       page_found = -EINVAL;
+               break;
        }
 
        if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -538,20 +678,23 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        if (page_found == -ENOENT) {
                /* Page not found in guest PTE entries */
-               vcpu->arch.dear = vcpu->arch.fault_dear;
-               to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
-               vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
+               vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+               vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
+               vcpu->arch.shared->msr |=
+                       (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EPERM) {
                /* Storage protection */
-               vcpu->arch.dear = vcpu->arch.fault_dear;
-               to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
-               to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
-               vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
+               vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
+               vcpu->arch.shared->dsisr =
+                       to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
+               vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
+               vcpu->arch.shared->msr |=
+                       (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EINVAL) {
                /* Page not found in guest SLB */
-               vcpu->arch.dear = vcpu->arch.fault_dear;
+               vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
                kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
        } else if (!is_mmio &&
                   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
@@ -583,11 +726,13 @@ static inline int get_fpr_index(int i)
 }
 
 /* Give up external provider (FPU, Altivec, VSX) */
-static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
+void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 {
        struct thread_struct *t = &current->thread;
        u64 *vcpu_fpr = vcpu->arch.fpr;
+#ifdef CONFIG_VSX
        u64 *vcpu_vsx = vcpu->arch.vsr;
+#endif
        u64 *thread_fpr = (u64*)t->fpr;
        int i;
 
@@ -629,21 +774,67 @@ static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
        kvmppc_recalc_shadow_msr(vcpu);
 }
 
+static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
+{
+       ulong srr0 = kvmppc_get_pc(vcpu);
+       u32 last_inst = kvmppc_get_last_inst(vcpu);
+       int ret;
+
+       ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
+       if (ret == -ENOENT) {
+               ulong msr = vcpu->arch.shared->msr;
+
+               msr = kvmppc_set_field(msr, 33, 33, 1);
+               msr = kvmppc_set_field(msr, 34, 36, 0);
+               vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
+               kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
+               return EMULATE_AGAIN;
+       }
+
+       return EMULATE_DONE;
+}
+
+static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+{
+
+       /* Need to do paired single emulation? */
+       if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
+               return EMULATE_DONE;
+
+       /* Read out the instruction */
+       if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
+               /* Need to emulate */
+               return EMULATE_FAIL;
+
+       return EMULATE_AGAIN;
+}
+
 /* Handle external providers (FPU, Altivec, VSX) */
 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
                             ulong msr)
 {
        struct thread_struct *t = &current->thread;
        u64 *vcpu_fpr = vcpu->arch.fpr;
+#ifdef CONFIG_VSX
        u64 *vcpu_vsx = vcpu->arch.vsr;
+#endif
        u64 *thread_fpr = (u64*)t->fpr;
        int i;
 
-       if (!(vcpu->arch.msr & msr)) {
+       /* When we have paired singles, we emulate in software */
+       if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
+               return RESUME_GUEST;
+
+       if (!(vcpu->arch.shared->msr & msr)) {
                kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
                return RESUME_GUEST;
        }
 
+       /* We already own the ext */
+       if (vcpu->arch.guest_owned_ext & msr) {
+               return RESUME_GUEST;
+       }
+
 #ifdef DEBUG_EXT
        printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
 #endif
@@ -694,23 +885,27 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        run->exit_reason = KVM_EXIT_UNKNOWN;
        run->ready_for_interrupt_injection = 1;
-#ifdef EXIT_DEBUG
-       printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n",
-               exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear,
-               kvmppc_get_dec(vcpu), vcpu->arch.msr);
-#elif defined (EXIT_DEBUG_SIMPLE)
-       if ((exit_nr != 0x900) && (exit_nr != 0x500))
-               printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n",
-                       exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear,
-                       vcpu->arch.msr);
-#endif
+
+       trace_kvm_book3s_exit(exit_nr, vcpu);
        kvm_resched(vcpu);
        switch (exit_nr) {
        case BOOK3S_INTERRUPT_INST_STORAGE:
                vcpu->stat.pf_instruc++;
+
+#ifdef CONFIG_PPC_BOOK3S_32
+               /* We set segments as unused segments when invalidating them. So
+                * treat the respective fault as segment fault. */
+               if (to_svcpu(vcpu)->sr[kvmppc_get_pc(vcpu) >> SID_SHIFT]
+                   == SR_INVALID) {
+                       kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+                       r = RESUME_GUEST;
+                       break;
+               }
+#endif
+
                /* only care about PTEG not found errors, but leave NX alone */
-               if (vcpu->arch.shadow_srr1 & 0x40000000) {
-                       r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr);
+               if (to_svcpu(vcpu)->shadow_srr1 & 0x40000000) {
+                       r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
                        vcpu->stat.sp_instruc++;
                } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
                          (!(vcpu->arch.hflags & BOOK3S_HFLAG_DCBZ32))) {
@@ -719,37 +914,51 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                         *     so we can't use the NX bit inside the guest. Let's cross our fingers,
                         *     that no guest that needs the dcbz hack does NX.
                         */
-                       kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
+                       kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
+                       r = RESUME_GUEST;
                } else {
-                       vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000;
+                       vcpu->arch.shared->msr |=
+                               to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
                        kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-                       kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
                        r = RESUME_GUEST;
                }
                break;
        case BOOK3S_INTERRUPT_DATA_STORAGE:
+       {
+               ulong dar = kvmppc_get_fault_dar(vcpu);
                vcpu->stat.pf_storage++;
+
+#ifdef CONFIG_PPC_BOOK3S_32
+               /* We set segments as unused segments when invalidating them. So
+                * treat the respective fault as segment fault. */
+               if ((to_svcpu(vcpu)->sr[dar >> SID_SHIFT]) == SR_INVALID) {
+                       kvmppc_mmu_map_segment(vcpu, dar);
+                       r = RESUME_GUEST;
+                       break;
+               }
+#endif
+
                /* The only case we need to handle is missing shadow PTEs */
-               if (vcpu->arch.fault_dsisr & DSISR_NOHPTE) {
-                       r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.fault_dear, exit_nr);
+               if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
+                       r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
                } else {
-                       vcpu->arch.dear = vcpu->arch.fault_dear;
-                       to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
+                       vcpu->arch.shared->dar = dar;
+                       vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
                        kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-                       kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFULL);
                        r = RESUME_GUEST;
                }
                break;
+       }
        case BOOK3S_INTERRUPT_DATA_SEGMENT:
-               if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.fault_dear) < 0) {
-                       vcpu->arch.dear = vcpu->arch.fault_dear;
+               if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
+                       vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
                        kvmppc_book3s_queue_irqprio(vcpu,
                                BOOK3S_INTERRUPT_DATA_SEGMENT);
                }
                r = RESUME_GUEST;
                break;
        case BOOK3S_INTERRUPT_INST_SEGMENT:
-               if (kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc) < 0) {
+               if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu)) < 0) {
                        kvmppc_book3s_queue_irqprio(vcpu,
                                BOOK3S_INTERRUPT_INST_SEGMENT);
                }
@@ -764,18 +973,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                vcpu->stat.ext_intr_exits++;
                r = RESUME_GUEST;
                break;
+       case BOOK3S_INTERRUPT_PERFMON:
+               r = RESUME_GUEST;
+               break;
        case BOOK3S_INTERRUPT_PROGRAM:
        {
                enum emulation_result er;
                ulong flags;
 
-               flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
+program_interrupt:
+               flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
 
-               if (vcpu->arch.msr & MSR_PR) {
+               if (vcpu->arch.shared->msr & MSR_PR) {
 #ifdef EXIT_DEBUG
-                       printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", vcpu->arch.pc, vcpu->arch.last_inst);
+                       printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
 #endif
-                       if ((vcpu->arch.last_inst & 0xff0007ff) !=
+                       if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
                            (INS_DCBZ & 0xfffffff7)) {
                                kvmppc_core_queue_program(vcpu, flags);
                                r = RESUME_GUEST;
@@ -794,31 +1007,80 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        break;
                case EMULATE_FAIL:
                        printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
-                              __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+                              __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
                        kvmppc_core_queue_program(vcpu, flags);
                        r = RESUME_GUEST;
                        break;
+               case EMULATE_DO_MMIO:
+                       run->exit_reason = KVM_EXIT_MMIO;
+                       r = RESUME_HOST_NV;
+                       break;
                default:
                        BUG();
                }
                break;
        }
        case BOOK3S_INTERRUPT_SYSCALL:
-#ifdef EXIT_DEBUG
-               printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0));
-#endif
-               vcpu->stat.syscall_exits++;
-               kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-               r = RESUME_GUEST;
+               if (vcpu->arch.osi_enabled &&
+                   (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
+                   (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
+                       /* MOL hypercalls */
+                       u64 *gprs = run->osi.gprs;
+                       int i;
+
+                       run->exit_reason = KVM_EXIT_OSI;
+                       for (i = 0; i < 32; i++)
+                               gprs[i] = kvmppc_get_gpr(vcpu, i);
+                       vcpu->arch.osi_needed = 1;
+                       r = RESUME_HOST_NV;
+               } else if (!(vcpu->arch.shared->msr & MSR_PR) &&
+                   (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+                       /* KVM PV hypercalls */
+                       kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+                       r = RESUME_GUEST;
+               } else {
+                       /* Guest syscalls */
+                       vcpu->stat.syscall_exits++;
+                       kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+                       r = RESUME_GUEST;
+               }
                break;
        case BOOK3S_INTERRUPT_FP_UNAVAIL:
-               r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP);
-               break;
        case BOOK3S_INTERRUPT_ALTIVEC:
-               r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC);
-               break;
        case BOOK3S_INTERRUPT_VSX:
-               r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX);
+       {
+               int ext_msr = 0;
+
+               switch (exit_nr) {
+               case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP;  break;
+               case BOOK3S_INTERRUPT_ALTIVEC:    ext_msr = MSR_VEC; break;
+               case BOOK3S_INTERRUPT_VSX:        ext_msr = MSR_VSX; break;
+               }
+
+               switch (kvmppc_check_ext(vcpu, exit_nr)) {
+               case EMULATE_DONE:
+                       /* everything ok - let's enable the ext */
+                       r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
+                       break;
+               case EMULATE_FAIL:
+                       /* we need to emulate this instruction */
+                       goto program_interrupt;
+                       break;
+               default:
+                       /* nothing to worry about - go again */
+                       break;
+               }
+               break;
+       }
+       case BOOK3S_INTERRUPT_ALIGNMENT:
+               if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
+                       vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
+                               kvmppc_get_last_inst(vcpu));
+                       vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
+                               kvmppc_get_last_inst(vcpu));
+                       kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+               }
+               r = RESUME_GUEST;
                break;
        case BOOK3S_INTERRUPT_MACHINE_CHECK:
        case BOOK3S_INTERRUPT_TRACE:
@@ -828,7 +1090,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        default:
                /* Ugh - bork here! What did we get? */
                printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
-                       exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1);
+                       exit_nr, kvmppc_get_pc(vcpu), to_svcpu(vcpu)->shadow_srr1);
                r = RESUME_HOST;
                BUG();
                break;
@@ -854,9 +1116,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                }
        }
 
-#ifdef EXIT_DEBUG
-       printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, vcpu->arch.pc, r);
-#endif
+       trace_kvm_book3s_reenter(r, vcpu);
 
        return r;
 }
@@ -870,22 +1130,23 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
        int i;
 
-       regs->pc = vcpu->arch.pc;
+       regs->pc = kvmppc_get_pc(vcpu);
        regs->cr = kvmppc_get_cr(vcpu);
-       regs->ctr = vcpu->arch.ctr;
-       regs->lr = vcpu->arch.lr;
+       regs->ctr = kvmppc_get_ctr(vcpu);
+       regs->lr = kvmppc_get_lr(vcpu);
        regs->xer = kvmppc_get_xer(vcpu);
-       regs->msr = vcpu->arch.msr;
-       regs->srr0 = vcpu->arch.srr0;
-       regs->srr1 = vcpu->arch.srr1;
+       regs->msr = vcpu->arch.shared->msr;
+       regs->srr0 = vcpu->arch.shared->srr0;
+       regs->srr1 = vcpu->arch.shared->srr1;
        regs->pid = vcpu->arch.pid;
-       regs->sprg0 = vcpu->arch.sprg0;
-       regs->sprg1 = vcpu->arch.sprg1;
-       regs->sprg2 = vcpu->arch.sprg2;
-       regs->sprg3 = vcpu->arch.sprg3;
-       regs->sprg5 = vcpu->arch.sprg4;
-       regs->sprg6 = vcpu->arch.sprg5;
-       regs->sprg7 = vcpu->arch.sprg6;
+       regs->sprg0 = vcpu->arch.shared->sprg0;
+       regs->sprg1 = vcpu->arch.shared->sprg1;
+       regs->sprg2 = vcpu->arch.shared->sprg2;
+       regs->sprg3 = vcpu->arch.shared->sprg3;
+       regs->sprg4 = vcpu->arch.sprg4;
+       regs->sprg5 = vcpu->arch.sprg5;
+       regs->sprg6 = vcpu->arch.sprg6;
+       regs->sprg7 = vcpu->arch.sprg7;
 
        for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
                regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -897,21 +1158,22 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
        int i;
 
-       vcpu->arch.pc = regs->pc;
+       kvmppc_set_pc(vcpu, regs->pc);
        kvmppc_set_cr(vcpu, regs->cr);
-       vcpu->arch.ctr = regs->ctr;
-       vcpu->arch.lr = regs->lr;
+       kvmppc_set_ctr(vcpu, regs->ctr);
+       kvmppc_set_lr(vcpu, regs->lr);
        kvmppc_set_xer(vcpu, regs->xer);
        kvmppc_set_msr(vcpu, regs->msr);
-       vcpu->arch.srr0 = regs->srr0;
-       vcpu->arch.srr1 = regs->srr1;
-       vcpu->arch.sprg0 = regs->sprg0;
-       vcpu->arch.sprg1 = regs->sprg1;
-       vcpu->arch.sprg2 = regs->sprg2;
-       vcpu->arch.sprg3 = regs->sprg3;
-       vcpu->arch.sprg5 = regs->sprg4;
-       vcpu->arch.sprg6 = regs->sprg5;
-       vcpu->arch.sprg7 = regs->sprg6;
+       vcpu->arch.shared->srr0 = regs->srr0;
+       vcpu->arch.shared->srr1 = regs->srr1;
+       vcpu->arch.shared->sprg0 = regs->sprg0;
+       vcpu->arch.shared->sprg1 = regs->sprg1;
+       vcpu->arch.shared->sprg2 = regs->sprg2;
+       vcpu->arch.shared->sprg3 = regs->sprg3;
+       vcpu->arch.sprg4 = regs->sprg4;
+       vcpu->arch.sprg5 = regs->sprg5;
+       vcpu->arch.sprg6 = regs->sprg6;
+       vcpu->arch.sprg7 = regs->sprg7;
 
        for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
                kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
@@ -930,19 +1192,19 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
        sregs->u.s.sdr1 = to_book3s(vcpu)->sdr1;
        if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
                for (i = 0; i < 64; i++) {
-                       sregs->u.s.ppc64.slb[i].slbe = vcpu3s->slb[i].orige | i;
-                       sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv;
+                       sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige | i;
+                       sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
                }
        } else {
-               for (i = 0; i < 16; i++) {
-                       sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
-                       sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
-               }
+               for (i = 0; i < 16; i++)
+                       sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
+
                for (i = 0; i < 8; i++) {
                        sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
                        sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
                }
        }
+
        return 0;
 }
 
@@ -978,6 +1240,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
        /* Flush the MMU after messing with the segments */
        kvmppc_mmu_pte_flush(vcpu, 0, 0);
+
        return 0;
 }
 
@@ -1045,48 +1308,65 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 {
        struct kvmppc_vcpu_book3s *vcpu_book3s;
        struct kvm_vcpu *vcpu;
-       int err;
+       int err = -ENOMEM;
+       unsigned long p;
 
-       vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO,
-                       get_order(sizeof(struct kvmppc_vcpu_book3s)));
-       if (!vcpu_book3s) {
-               err = -ENOMEM;
+       vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
+       if (!vcpu_book3s)
                goto out;
-       }
+
+       vcpu_book3s->shadow_vcpu = (struct kvmppc_book3s_shadow_vcpu *)
+               kzalloc(sizeof(*vcpu_book3s->shadow_vcpu), GFP_KERNEL);
+       if (!vcpu_book3s->shadow_vcpu)
+               goto free_vcpu;
 
        vcpu = &vcpu_book3s->vcpu;
        err = kvm_vcpu_init(vcpu, kvm, id);
        if (err)
-               goto free_vcpu;
+               goto free_shadow_vcpu;
+
+       p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
+       /* the real shared page fills the last 4k of our page */
+       vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
+       if (!p)
+               goto uninit_vcpu;
 
        vcpu->arch.host_retip = kvm_return_point;
        vcpu->arch.host_msr = mfmsr();
+#ifdef CONFIG_PPC_BOOK3S_64
        /* default to book3s_64 (970fx) */
        vcpu->arch.pvr = 0x3C0301;
+#else
+       /* default to book3s_32 (750) */
+       vcpu->arch.pvr = 0x84202;
+#endif
        kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
-       vcpu_book3s->slb_nr = 64;
+       vcpu->arch.slb_nr = 64;
 
        /* remember where some real-mode handlers are */
-       vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
-       vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
+       vcpu->arch.trampoline_lowmem = __pa(kvmppc_handler_lowmem_trampoline);
+       vcpu->arch.trampoline_enter = __pa(kvmppc_handler_trampoline_enter);
        vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
+#ifdef CONFIG_PPC_BOOK3S_64
        vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
+#else
+       vcpu->arch.rmcall = (ulong)kvmppc_rmcall;
+#endif
 
        vcpu->arch.shadow_msr = MSR_USER64;
 
-       err = __init_new_context();
+       err = kvmppc_mmu_init(vcpu);
        if (err < 0)
-               goto free_vcpu;
-       vcpu_book3s->context_id = err;
-
-       vcpu_book3s->vsid_max = ((vcpu_book3s->context_id + 1) << USER_ESID_BITS) - 1;
-       vcpu_book3s->vsid_first = vcpu_book3s->context_id << USER_ESID_BITS;
-       vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
+               goto uninit_vcpu;
 
        return vcpu;
 
+uninit_vcpu:
+       kvm_vcpu_uninit(vcpu);
+free_shadow_vcpu:
+       kfree(vcpu_book3s->shadow_vcpu);
 free_vcpu:
-       free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s)));
+       vfree(vcpu_book3s);
 out:
        return ERR_PTR(err);
 }
@@ -1095,18 +1375,28 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 
-       __destroy_context(vcpu_book3s->context_id);
+       free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
        kvm_vcpu_uninit(vcpu);
-       free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s)));
+       kfree(vcpu_book3s->shadow_vcpu);
+       vfree(vcpu_book3s);
 }
 
 extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
        int ret;
-       struct thread_struct ext_bkp;
-       bool save_vec = current->thread.used_vr;
-       bool save_vsx = current->thread.used_vsr;
+       double fpr[32][TS_FPRWIDTH];
+       unsigned int fpscr;
+       int fpexc_mode;
+#ifdef CONFIG_ALTIVEC
+       vector128 vr[32];
+       vector128 vscr;
+       unsigned long uninitialized_var(vrsave);
+       int used_vr;
+#endif
+#ifdef CONFIG_VSX
+       int used_vsr;
+#endif
        ulong ext_msr;
 
        /* No need to go into the guest when all we do is going out */
@@ -1118,27 +1408,27 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        /* Save FPU state in stack */
        if (current->thread.regs->msr & MSR_FP)
                giveup_fpu(current);
-       memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr));
-       ext_bkp.fpscr = current->thread.fpscr;
-       ext_bkp.fpexc_mode = current->thread.fpexc_mode;
+       memcpy(fpr, current->thread.fpr, sizeof(current->thread.fpr));
+       fpscr = current->thread.fpscr.val;
+       fpexc_mode = current->thread.fpexc_mode;
 
 #ifdef CONFIG_ALTIVEC
        /* Save Altivec state in stack */
-       if (save_vec) {
+       used_vr = current->thread.used_vr;
+       if (used_vr) {
                if (current->thread.regs->msr & MSR_VEC)
                        giveup_altivec(current);
-               memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr));
-               ext_bkp.vscr = current->thread.vscr;
-               ext_bkp.vrsave = current->thread.vrsave;
+               memcpy(vr, current->thread.vr, sizeof(current->thread.vr));
+               vscr = current->thread.vscr;
+               vrsave = current->thread.vrsave;
        }
-       ext_bkp.used_vr = current->thread.used_vr;
 #endif
 
 #ifdef CONFIG_VSX
        /* Save VSX state in stack */
-       if (save_vsx && (current->thread.regs->msr & MSR_VSX))
+       used_vsr = current->thread.used_vsr;
+       if (used_vsr && (current->thread.regs->msr & MSR_VSX))
                        __giveup_vsx(current);
-       ext_bkp.used_vsr = current->thread.used_vsr;
 #endif
 
        /* Remember the MSR with disabled extensions */
@@ -1147,6 +1437,10 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        /* XXX we get called with irq disabled - change that! */
        local_irq_enable();
 
+       /* Preload FPU if it's enabled */
+       if (vcpu->arch.shared->msr & MSR_FP)
+               kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+
        ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
 
        local_irq_disable();
@@ -1159,22 +1453,22 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
        kvmppc_giveup_ext(vcpu, MSR_VSX);
 
        /* Restore FPU state from stack */
-       memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr));
-       current->thread.fpscr = ext_bkp.fpscr;
-       current->thread.fpexc_mode = ext_bkp.fpexc_mode;
+       memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
+       current->thread.fpscr.val = fpscr;
+       current->thread.fpexc_mode = fpexc_mode;
 
 #ifdef CONFIG_ALTIVEC
        /* Restore Altivec state from stack */
-       if (save_vec && current->thread.used_vr) {
-               memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr));
-               current->thread.vscr = ext_bkp.vscr;
-               current->thread.vrsave= ext_bkp.vrsave;
+       if (used_vr && current->thread.used_vr) {
+               memcpy(current->thread.vr, vr, sizeof(current->thread.vr));
+               current->thread.vscr = vscr;
+               current->thread.vrsave = vrsave;
        }
-       current->thread.used_vr = ext_bkp.used_vr;
+       current->thread.used_vr = used_vr;
 #endif
 
 #ifdef CONFIG_VSX
-       current->thread.used_vsr = ext_bkp.used_vsr;
+       current->thread.used_vsr = used_vsr;
 #endif
 
        return ret;
@@ -1182,11 +1476,22 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 static int kvmppc_book3s_init(void)
 {
-       return kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), THIS_MODULE);
+       int r;
+
+       r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_book3s), 0,
+                    THIS_MODULE);
+
+       if (r)
+               return r;
+
+       r = kvmppc_mmu_hpte_sysinit();
+
+       return r;
 }
 
 static void kvmppc_book3s_exit(void)
 {
+       kvmppc_mmu_hpte_sysexit();
        kvm_exit();
 }