Merge branch 'kvm-updates/2.6.34' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 5 Mar 2010 21:12:34 +0000 (13:12 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 5 Mar 2010 21:12:34 +0000 (13:12 -0800)
* 'kvm-updates/2.6.34' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (145 commits)
  KVM: x86: Add KVM_CAP_X86_ROBUST_SINGLESTEP
  KVM: VMX: Update instruction length on intercepted BP
  KVM: Fix emulate_sys[call, enter, exit]()'s fault handling
  KVM: Fix segment descriptor loading
  KVM: Fix load_guest_segment_descriptor() to inject page fault
  KVM: x86 emulator: Forbid modifying CS segment register by mov instruction
  KVM: Convert kvm->requests_lock to raw_spinlock_t
  KVM: Convert i8254/i8259 locks to raw_spinlocks
  KVM: x86 emulator: disallow opcode 82 in 64-bit mode
  KVM: x86 emulator: code style cleanup
  KVM: Plan obsolescence of kernel allocated slots, paravirt mmu
  KVM: x86 emulator: Add LOCK prefix validity checking
  KVM: x86 emulator: Check CPL level during privilege instruction emulation
  KVM: x86 emulator: Fix popf emulation
  KVM: x86 emulator: Check IOPL level during io instruction emulation
  KVM: x86 emulator: fix memory access during x86 emulation
  KVM: x86 emulator: Add Virtual-8086 mode of emulation
  KVM: x86 emulator: Add group9 instruction decoding
  KVM: x86 emulator: Add group8 instruction decoding
  KVM: do not store wqh in irqfd
  ...

Trivial conflicts in Documentation/feature-removal-schedule.txt

74 files changed:
Documentation/feature-removal-schedule.txt
Documentation/kvm/api.txt
MAINTAINERS
arch/ia64/kvm/Kconfig
arch/ia64/kvm/kvm-ia64.c
arch/ia64/kvm/kvm_fw.c
arch/ia64/kvm/mmio.c
arch/ia64/kvm/vcpu.c
arch/powerpc/include/asm/kvm_asm.h
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/include/asm/kvm_book3s_64_asm.h
arch/powerpc/include/asm/kvm_e500.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/paca.h
arch/powerpc/include/asm/reg.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/ppc_ksyms.c
arch/powerpc/kvm/44x_emulate.c
arch/powerpc/kvm/44x_tlb.c
arch/powerpc/kvm/Kconfig
arch/powerpc/kvm/book3s.c
arch/powerpc/kvm/book3s_64_emulate.c
arch/powerpc/kvm/book3s_64_exports.c
arch/powerpc/kvm/book3s_64_interrupts.S
arch/powerpc/kvm/book3s_64_mmu.c
arch/powerpc/kvm/book3s_64_rmhandlers.S
arch/powerpc/kvm/book3s_64_slb.S
arch/powerpc/kvm/booke.c
arch/powerpc/kvm/booke_emulate.c
arch/powerpc/kvm/e500.c
arch/powerpc/kvm/e500_emulate.c
arch/powerpc/kvm/e500_tlb.c
arch/powerpc/kvm/emulate.c
arch/powerpc/kvm/powerpc.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/x86/include/asm/Kbuild
arch/x86/include/asm/hyperv.h [new file with mode: 0644]
arch/x86/include/asm/kvm_emulate.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_para.h
arch/x86/include/asm/svm.h
arch/x86/include/asm/vmx.h
arch/x86/kernel/vsyscall_64.c
arch/x86/kvm/Kconfig
arch/x86/kvm/emulate.c
arch/x86/kvm/i8254.c
arch/x86/kvm/i8254.h
arch/x86/kvm/i8259.c
arch/x86/kvm/irq.h
arch/x86/kvm/kvm_cache_regs.h
arch/x86/kvm/lapic.c
arch/x86/kvm/lapic.h
arch/x86/kvm/mmu.c
arch/x86/kvm/mmu.h
arch/x86/kvm/paging_tmpl.h
arch/x86/kvm/svm.c
arch/x86/kvm/trace.h
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
include/linux/kvm.h
include/linux/kvm_host.h
include/trace/events/kvm.h
virt/kvm/Kconfig
virt/kvm/assigned-dev.c
virt/kvm/coalesced_mmio.c
virt/kvm/coalesced_mmio.h
virt/kvm/eventfd.c
virt/kvm/ioapic.c
virt/kvm/ioapic.h
virt/kvm/iommu.c
virt/kvm/kvm_main.c

index 03497909539e7859d460b131c46c6e853af5e48a..31575e220f3ba9da40526301ff502d85850768ff 100644 (file)
@@ -556,3 +556,35 @@ Why:       udev fully replaces this special file system that only contains CAPI
        NCCI TTY device nodes. User space (pppdcapiplugin) works without
        noticing the difference.
 Who:   Jan Kiszka <jan.kiszka@web.de>
+
+----------------------------
+
+What:  KVM memory aliases support
+When:  July 2010
+Why:   Memory aliasing support is used for speeding up guest vga access
+       through the vga windows.
+
+       Modern userspace no longer uses this feature, so it's just bitrotted
+       code and can be removed with no impact.
+Who:   Avi Kivity <avi@redhat.com>
+
+----------------------------
+
+What:  KVM kernel-allocated memory slots
+When:  July 2010
+Why:   Since 2.6.25, kvm supports user-allocated memory slots, which are
+       much more flexible than kernel-allocated slots.  All current userspace
+       supports the newer interface and this code can be removed with no
+       impact.
+Who:   Avi Kivity <avi@redhat.com>
+
+----------------------------
+
+What:  KVM paravirt mmu host support
+When:  January 2011
+Why:   The paravirt mmu host support is slower than non-paravirt mmu, both
+       on newer and older hardware.  It is already not exposed to the guest,
+       and kept only for live migration purposes.
+Who:   Avi Kivity <avi@redhat.com>
+
+----------------------------
index 2811e452f7566f5f5fe4fa6242f8724a9970ba11..c6416a398163155bc64341754b99039aba529502 100644 (file)
@@ -23,12 +23,12 @@ of a virtual machine.  The ioctls belong to three classes
    Only run vcpu ioctls from the same thread that was used to create the
    vcpu.
 
-2. File descritpors
+2. File descriptors
 
 The kvm API is centered around file descriptors.  An initial
 open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
 can be used to issue system ioctls.  A KVM_CREATE_VM ioctl on this
-handle will create a VM file descripror which can be used to issue VM
+handle will create a VM file descriptor which can be used to issue VM
 ioctls.  A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
 and return a file descriptor pointing to it.  Finally, ioctls on a vcpu
 fd can be used to control the vcpu, including the important task of
@@ -643,7 +643,7 @@ Type: vm ioctl
 Parameters: struct kvm_clock_data (in)
 Returns: 0 on success, -1 on error
 
-Sets the current timestamp of kvmclock to the valued specific in its parameter.
+Sets the current timestamp of kvmclock to the value specified in its parameter.
 In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
 such as migration.
 
@@ -795,11 +795,11 @@ Unused.
                        __u64 data_offset; /* relative to kvm_run start */
                } io;
 
-If exit_reason is KVM_EXIT_IO_IN or KVM_EXIT_IO_OUT, then the vcpu has
+If exit_reason is KVM_EXIT_IO, then the vcpu has
 executed a port I/O instruction which could not be satisfied by kvm.
 data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
 where kvm expects application code to place the data for the next
-KVM_RUN invocation (KVM_EXIT_IO_IN).  Data format is a patcked array.
+KVM_RUN invocation (KVM_EXIT_IO_IN).  Data format is a packed array.
 
                struct {
                        struct kvm_debug_exit_arch arch;
@@ -815,7 +815,7 @@ Unused.
                        __u8  is_write;
                } mmio;
 
-If exit_reason is KVM_EXIT_MMIO or KVM_EXIT_IO_OUT, then the vcpu has
+If exit_reason is KVM_EXIT_MMIO, then the vcpu has
 executed a memory-mapped I/O instruction which could not be satisfied
 by kvm.  The 'data' member contains the written data if 'is_write' is
 true, and should be filled by application code otherwise.
index c6591bca646b3599703a3ca0ab0727ae2e53165b..51d8b5221dd8ccbfa208b2dec122c4b16835c20e 100644 (file)
@@ -3173,7 +3173,7 @@ F:        arch/x86/include/asm/svm.h
 F:     arch/x86/kvm/svm.c
 
 KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
-M:     Hollis Blanchard <hollisb@us.ibm.com>
+M:     Alexander Graf <agraf@suse.de>
 L:     kvm-ppc@vger.kernel.org
 W:     http://kvm.qumranet.com
 S:     Supported
index 01c75797119c568bd4d16919d3a122354a8b1a9c..fa4d1e59deb05c21281011cc07fe5fecfd8a3fb4 100644 (file)
@@ -26,6 +26,7 @@ config KVM
        select ANON_INODES
        select HAVE_KVM_IRQCHIP
        select KVM_APIC_ARCHITECTURE
+       select KVM_MMIO
        ---help---
          Support hosting fully virtualized guest machines using hardware
          virtualization extensions.  You will need a fairly recent
index 5fdeec5fddcf60db5d6a96dd5f5d86e533f2b0f1..26e0e089bfe76b0b89772bfc7f5617abc02d2a9d 100644 (file)
@@ -241,10 +241,10 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        return 0;
 mmio:
        if (p->dir)
-               r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr,
+               r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
                                    p->size, &p->data);
        else
-               r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr,
+               r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
                                     p->size, &p->data);
        if (r)
                printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
@@ -636,12 +636,9 @@ static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
 static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
        union context *host_ctx, *guest_ctx;
-       int r;
+       int r, idx;
 
-       /*
-        * down_read() may sleep and return with interrupts enabled
-        */
-       down_read(&vcpu->kvm->slots_lock);
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
 
 again:
        if (signal_pending(current)) {
@@ -663,7 +660,7 @@ again:
        if (r < 0)
                goto vcpu_run_fail;
 
-       up_read(&vcpu->kvm->slots_lock);
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
        kvm_guest_enter();
 
        /*
@@ -687,7 +684,7 @@ again:
        kvm_guest_exit();
        preempt_enable();
 
-       down_read(&vcpu->kvm->slots_lock);
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
 
        r = kvm_handle_exit(kvm_run, vcpu);
 
@@ -697,10 +694,10 @@ again:
        }
 
 out:
-       up_read(&vcpu->kvm->slots_lock);
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
        if (r > 0) {
                kvm_resched(vcpu);
-               down_read(&vcpu->kvm->slots_lock);
+               idx = srcu_read_lock(&vcpu->kvm->srcu);
                goto again;
        }
 
@@ -971,7 +968,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        goto out;
                r = kvm_setup_default_irq_routing(kvm);
                if (r) {
-                       kfree(kvm->arch.vioapic);
+                       kvm_ioapic_destroy(kvm);
                        goto out;
                }
                break;
@@ -1377,12 +1374,14 @@ static void free_kvm(struct kvm *kvm)
 
 static void kvm_release_vm_pages(struct kvm *kvm)
 {
+       struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
        int i, j;
        unsigned long base_gfn;
 
-       for (i = 0; i < kvm->nmemslots; i++) {
-               memslot = &kvm->memslots[i];
+       slots = rcu_dereference(kvm->memslots);
+       for (i = 0; i < slots->nmemslots; i++) {
+               memslot = &slots->memslots[i];
                base_gfn = memslot->base_gfn;
 
                for (j = 0; j < memslot->npages; j++) {
@@ -1405,6 +1404,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
        kfree(kvm->arch.vioapic);
        kvm_release_vm_pages(kvm);
        kvm_free_physmem(kvm);
+       cleanup_srcu_struct(&kvm->srcu);
        free_kvm(kvm);
 }
 
@@ -1576,15 +1576,15 @@ out:
        return r;
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-               struct kvm_userspace_memory_region *mem,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+               struct kvm_memory_slot *memslot,
                struct kvm_memory_slot old,
+               struct kvm_userspace_memory_region *mem,
                int user_alloc)
 {
        unsigned long i;
        unsigned long pfn;
-       int npages = mem->memory_size >> PAGE_SHIFT;
-       struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+       int npages = memslot->npages;
        unsigned long base_gfn = memslot->base_gfn;
 
        if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
@@ -1608,6 +1608,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
        return 0;
 }
 
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+               struct kvm_userspace_memory_region *mem,
+               struct kvm_memory_slot old,
+               int user_alloc)
+{
+       return;
+}
+
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
        kvm_flush_remote_tlbs(kvm);
@@ -1802,7 +1810,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
        if (log->slot >= KVM_MEMORY_SLOTS)
                goto out;
 
-       memslot = &kvm->memslots[log->slot];
+       memslot = &kvm->memslots->memslots[log->slot];
        r = -ENOENT;
        if (!memslot->dirty_bitmap)
                goto out;
@@ -1827,6 +1835,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
        struct kvm_memory_slot *memslot;
        int is_dirty = 0;
 
+       mutex_lock(&kvm->slots_lock);
        spin_lock(&kvm->arch.dirty_log_lock);
 
        r = kvm_ia64_sync_dirty_log(kvm, log);
@@ -1840,12 +1849,13 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
        /* If nothing is dirty, don't bother messing with page tables. */
        if (is_dirty) {
                kvm_flush_remote_tlbs(kvm);
-               memslot = &kvm->memslots[log->slot];
+               memslot = &kvm->memslots->memslots[log->slot];
                n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
                memset(memslot->dirty_bitmap, 0, n);
        }
        r = 0;
 out:
+       mutex_unlock(&kvm->slots_lock);
        spin_unlock(&kvm->arch.dirty_log_lock);
        return r;
 }
index e4b82319881db227044550b3330f1eea69770f50..cb548ee9fcaed4ce24c1f3c0994d397944afbe9d 100644 (file)
@@ -75,7 +75,7 @@ static void set_pal_result(struct kvm_vcpu *vcpu,
        struct exit_ctl_data *p;
 
        p = kvm_get_exit_data(vcpu);
-       if (p && p->exit_reason == EXIT_REASON_PAL_CALL) {
+       if (p->exit_reason == EXIT_REASON_PAL_CALL) {
                p->u.pal_data.ret = result;
                return ;
        }
@@ -87,7 +87,7 @@ static void set_sal_result(struct kvm_vcpu *vcpu,
        struct exit_ctl_data *p;
 
        p = kvm_get_exit_data(vcpu);
-       if (p && p->exit_reason == EXIT_REASON_SAL_CALL) {
+       if (p->exit_reason == EXIT_REASON_SAL_CALL) {
                p->u.sal_data.ret = result;
                return ;
        }
@@ -322,7 +322,7 @@ static  u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
        struct exit_ctl_data *p;
 
        p = kvm_get_exit_data(vcpu);
-       if (p && (p->exit_reason == EXIT_REASON_PAL_CALL))
+       if (p->exit_reason == EXIT_REASON_PAL_CALL)
                index = p->u.pal_data.gr28;
 
        return index;
@@ -646,18 +646,16 @@ static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
 
        p = kvm_get_exit_data(vcpu);
 
-       if (p) {
-               if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-                       *in0 = p->u.sal_data.in0;
-                       *in1 = p->u.sal_data.in1;
-                       *in2 = p->u.sal_data.in2;
-                       *in3 = p->u.sal_data.in3;
-                       *in4 = p->u.sal_data.in4;
-                       *in5 = p->u.sal_data.in5;
-                       *in6 = p->u.sal_data.in6;
-                       *in7 = p->u.sal_data.in7;
-                       return ;
-               }
+       if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+               *in0 = p->u.sal_data.in0;
+               *in1 = p->u.sal_data.in1;
+               *in2 = p->u.sal_data.in2;
+               *in3 = p->u.sal_data.in3;
+               *in4 = p->u.sal_data.in4;
+               *in5 = p->u.sal_data.in5;
+               *in6 = p->u.sal_data.in6;
+               *in7 = p->u.sal_data.in7;
+               return ;
        }
        *in0 = 0;
 }
index 9bf55afd08d0603902ae63877499a91488a6d221..fb8f9f59a1eddea3bf967f5475582f5783980f9e 100644 (file)
@@ -316,8 +316,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
                return;
        } else {
                inst_type = -1;
-               panic_vm(vcpu, "Unsupported MMIO access instruction! \
-                               Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
+               panic_vm(vcpu, "Unsupported MMIO access instruction! "
+                               "Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
                                bundle.i64[0], bundle.i64[1]);
        }
 
index dce75b70cdd5d9abce6b28d419b5aba6ba97cb0d..958815c9787d2997a64ba4636084e0676fdc30ea 100644 (file)
@@ -1639,8 +1639,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
         * Otherwise panic
         */
        if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
-               panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
-                               & vpsr.is=0\n");
+               panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
+                               "& vpsr.is=0\n");
 
        /*
         * For those IA64_PSR bits: id/da/dd/ss/ed/ia
index af2abe74f54440199fdccfeccdd2de085ec7a203..aadf2dd6f84e15c919c9020e9271c3e8f7427a35 100644 (file)
 #define RESUME_HOST             RESUME_FLAG_HOST
 #define RESUME_HOST_NV          (RESUME_FLAG_HOST|RESUME_FLAG_NV)
 
+#define KVM_GUEST_MODE_NONE    0
+#define KVM_GUEST_MODE_GUEST   1
+#define KVM_GUEST_MODE_SKIP    2
+
+#define KVM_INST_FETCH_FAILED  -1
+
 #endif /* __POWERPC_KVM_ASM_H__ */
index 74b7369770d05b3f93a9c233e3815aef3ad51300..db7db0a96967ad8dc4c2eb2a5c2438d878187e73 100644 (file)
@@ -22,7 +22,7 @@
 
 #include <linux/types.h>
 #include <linux/kvm_host.h>
-#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s_64_asm.h>
 
 struct kvmppc_slb {
        u64 esid;
@@ -33,7 +33,8 @@ struct kvmppc_slb {
        bool Ks;
        bool Kp;
        bool nx;
-       bool large;
+       bool large;     /* PTEs are 16MB */
+       bool tb;        /* 1TB segment */
        bool class;
 };
 
@@ -69,6 +70,7 @@ struct kvmppc_sid_map {
 
 struct kvmppc_vcpu_book3s {
        struct kvm_vcpu vcpu;
+       struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
        struct kvmppc_sid_map sid_map[SID_MAP_NUM];
        struct kvmppc_slb slb[64];
        struct {
@@ -89,6 +91,7 @@ struct kvmppc_vcpu_book3s {
        u64 vsid_next;
        u64 vsid_max;
        int context_id;
+       ulong prog_flags; /* flags to inject when giving a 700 trap */
 };
 
 #define CONTEXT_HOST           0
@@ -119,6 +122,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
 
 extern u32 kvmppc_trampoline_lowmem;
 extern u32 kvmppc_trampoline_enter;
+extern void kvmppc_rmcall(ulong srr0, ulong srr1);
+extern void kvmppc_load_up_fpu(void);
+extern void kvmppc_load_up_altivec(void);
+extern void kvmppc_load_up_vsx(void);
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
 {
index 2e06ee8184ef8caef068ae2fc4496cb3404cc13c..183461b484076e25fd5ca714cce4426b82b0e1e6 100644 (file)
@@ -20,6 +20,8 @@
 #ifndef __ASM_KVM_BOOK3S_ASM_H__
 #define __ASM_KVM_BOOK3S_ASM_H__
 
+#ifdef __ASSEMBLY__
+
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 
 #include <asm/kvm_asm.h>
@@ -55,4 +57,20 @@ kvmppc_resume_\intno:
 
 #endif /* CONFIG_KVM_BOOK3S_64_HANDLER */
 
+#else  /*__ASSEMBLY__ */
+
+struct kvmppc_book3s_shadow_vcpu {
+       ulong gpr[14];
+       u32 cr;
+       u32 xer;
+       ulong host_r1;
+       ulong host_r2;
+       ulong handler;
+       ulong scratch0;
+       ulong scratch1;
+       ulong vmhandler;
+};
+
+#endif /*__ASSEMBLY__ */
+
 #endif /* __ASM_KVM_BOOK3S_ASM_H__ */
index 9d497ce497267fa8958add05cfa0aa5d211af903..7fea26fffb25f692f50f71d8e4b3d858d9478aad 100644 (file)
@@ -52,9 +52,12 @@ struct kvmppc_vcpu_e500 {
        u32 mas5;
        u32 mas6;
        u32 mas7;
+       u32 l1csr0;
        u32 l1csr1;
        u32 hid0;
        u32 hid1;
+       u32 tlb0cfg;
+       u32 tlb1cfg;
 
        struct kvm_vcpu vcpu;
 };
index 1201f62d0d73f23e2e75e9b9622499ca259359c7..5e5bae7e152fe4a084eef9f61cf72abfe728e399 100644 (file)
@@ -167,23 +167,40 @@ struct kvm_vcpu_arch {
        ulong trampoline_lowmem;
        ulong trampoline_enter;
        ulong highmem_handler;
+       ulong rmcall;
        ulong host_paca_phys;
        struct kvmppc_mmu mmu;
 #endif
 
-       u64 fpr[32];
        ulong gpr[32];
 
+       u64 fpr[32];
+       u32 fpscr;
+
+#ifdef CONFIG_ALTIVEC
+       vector128 vr[32];
+       vector128 vscr;
+#endif
+
+#ifdef CONFIG_VSX
+       u64 vsr[32];
+#endif
+
        ulong pc;
-       u32 cr;
        ulong ctr;
        ulong lr;
+
+#ifdef CONFIG_BOOKE
        ulong xer;
+       u32 cr;
+#endif
 
        ulong msr;
 #ifdef CONFIG_PPC64
        ulong shadow_msr;
+       ulong shadow_srr1;
        ulong hflags;
+       ulong guest_owned_ext;
 #endif
        u32 mmucr;
        ulong sprg0;
@@ -242,6 +259,8 @@ struct kvm_vcpu_arch {
 #endif
        ulong fault_dear;
        ulong fault_esr;
+       ulong queued_dear;
+       ulong queued_esr;
        gpa_t paddr_accessed;
 
        u8 io_gpr; /* GPR used as IO source/target */
index 269ee46ab0285701f84411748a51e6f14c5b1018..e2642829e4350b5d9c0f62a17c11b03a06f75c58 100644 (file)
@@ -28,6 +28,9 @@
 #include <linux/types.h>
 #include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/kvm_book3s.h>
+#endif
 
 enum emulation_result {
        EMULATE_DONE,         /* no further processing */
@@ -80,8 +83,9 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
 
 extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
 extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                        struct kvm_interrupt *irq);
 
@@ -95,4 +99,81 @@ extern void kvmppc_booke_exit(void);
 
 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_PPC_BOOK3S
+
+/* We assume we're always acting on the current vcpu */
+
+static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
+{
+       if ( num < 14 ) {
+               get_paca()->shadow_vcpu.gpr[num] = val;
+               to_book3s(vcpu)->shadow_vcpu.gpr[num] = val;
+       } else
+               vcpu->arch.gpr[num] = val;
+}
+
+static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
+{
+       if ( num < 14 )
+               return get_paca()->shadow_vcpu.gpr[num];
+       else
+               return vcpu->arch.gpr[num];
+}
+
+static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
+{
+       get_paca()->shadow_vcpu.cr = val;
+       to_book3s(vcpu)->shadow_vcpu.cr = val;
+}
+
+static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
+{
+       return get_paca()->shadow_vcpu.cr;
+}
+
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+{
+       get_paca()->shadow_vcpu.xer = val;
+       to_book3s(vcpu)->shadow_vcpu.xer = val;
+}
+
+static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+{
+       return get_paca()->shadow_vcpu.xer;
+}
+
+#else
+
+static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
+{
+       vcpu->arch.gpr[num] = val;
+}
+
+static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
+{
+       return vcpu->arch.gpr[num];
+}
+
+static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
+{
+       vcpu->arch.cr = val;
+}
+
+static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.cr;
+}
+
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+{
+       vcpu->arch.xer = val;
+}
+
+static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.xer;
+}
+
+#endif
+
 #endif /* __POWERPC_KVM_PPC_H__ */
index 5e9b4ef71415c70b207769b8a9caa1ff486eea71..d8a693109c8294808c226ec57b8d75e4ed57a92d 100644 (file)
@@ -19,6 +19,9 @@
 #include <asm/mmu.h>
 #include <asm/page.h>
 #include <asm/exception-64e.h>
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#include <asm/kvm_book3s_64_asm.h>
+#endif
 
 register struct paca_struct *local_paca asm("r13");
 
@@ -135,6 +138,8 @@ struct paca_struct {
                u64     esid;
                u64     vsid;
        } kvm_slb[64];                  /* guest SLB */
+       /* We use this to store guest state in */
+       struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
        u8 kvm_slb_max;                 /* highest used guest slb entry */
        u8 kvm_in_guest;                /* are we inside the guest? */
 #endif
index bc8dd53f718a1b201a16e91f4864bb221f5acb25..5572e86223f4f5afcf0634063d3279391fb1fa10 100644 (file)
 #define   SRR1_WAKEMT          0x00280000 /* mtctrl */
 #define   SRR1_WAKEDEC         0x00180000 /* Decrementer interrupt */
 #define   SRR1_WAKETHERM       0x00100000 /* Thermal management interrupt */
+#define   SRR1_PROGFPE         0x00100000 /* Floating Point Enabled */
+#define   SRR1_PROGPRIV                0x00040000 /* Privileged instruction */
+#define   SRR1_PROGTRAP                0x00020000 /* Trap */
+#define   SRR1_PROGADDR                0x00010000 /* SRR0 contains subsequent addr */
 #define SPRN_HSRR0     0x13A   /* Save/Restore Register 0 */
 #define SPRN_HSRR1     0x13B   /* Save/Restore Register 1 */
 
index a6c2b63227b32aa8b891ce33b5c3b8acc8787be0..957ceb7059c57a8a2bd7c5ffd9330a361baaf160 100644 (file)
@@ -194,6 +194,30 @@ int main(void)
        DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest));
        DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb));
        DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max));
+       DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr));
+       DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer));
+       DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0]));
+       DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1]));
+       DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2]));
+       DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3]));
+       DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4]));
+       DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5]));
+       DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6]));
+       DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7]));
+       DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8]));
+       DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9]));
+       DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10]));
+       DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11]));
+       DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12]));
+       DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13]));
+       DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1));
+       DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2));
+       DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct,
+                                           shadow_vcpu.vmhandler));
+       DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct,
+                                          shadow_vcpu.scratch0));
+       DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct,
+                                          shadow_vcpu.scratch1));
 #endif
 #endif /* CONFIG_PPC64 */
 
@@ -389,8 +413,6 @@ int main(void)
        DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
        DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
        DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
-       DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
-       DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
        DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
        DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
        DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
@@ -411,11 +433,16 @@ int main(void)
        DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2));
        DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
        DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+       DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
        DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
        DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
        DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
+       DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
        DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
-#endif
+#else
+       DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
+       DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
+#endif /* CONFIG_PPC64 */
 #endif
 #ifdef CONFIG_44x
        DEFINE(PGD_T_LOG2, PGD_T_LOG2);
index 425451453e96d9cd7e14c6f20fdc6c852ec2f235..ab3e392ac63c41fda474ce4857660bbd38e459bd 100644 (file)
@@ -107,6 +107,7 @@ EXPORT_SYMBOL(giveup_altivec);
 #endif /* CONFIG_ALTIVEC */
 #ifdef CONFIG_VSX
 EXPORT_SYMBOL(giveup_vsx);
+EXPORT_SYMBOL_GPL(__giveup_vsx);
 #endif /* CONFIG_VSX */
 #ifdef CONFIG_SPE
 EXPORT_SYMBOL(giveup_spe);
index 61af58fceceee8a6fcb87c9f0e2f1e55e9a3dfb5..65ea083a5b27bda5ba4cacb875894ec127513bde 100644 (file)
@@ -65,13 +65,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                         */
                        switch (dcrn) {
                        case DCRN_CPR0_CONFIG_ADDR:
-                               vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
+                               kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
                                break;
                        case DCRN_CPR0_CONFIG_DATA:
                                local_irq_disable();
                                mtdcr(DCRN_CPR0_CONFIG_ADDR,
                                          vcpu->arch.cpr0_cfgaddr);
-                               vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
+                               kvmppc_set_gpr(vcpu, rt,
+                                              mfdcr(DCRN_CPR0_CONFIG_DATA));
                                local_irq_enable();
                                break;
                        default:
@@ -93,11 +94,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        /* emulate some access in kernel */
                        switch (dcrn) {
                        case DCRN_CPR0_CONFIG_ADDR:
-                               vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
+                               vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
                                break;
                        default:
                                run->dcr.dcrn = dcrn;
-                               run->dcr.data = vcpu->arch.gpr[rs];
+                               run->dcr.data = kvmppc_get_gpr(vcpu, rs);
                                run->dcr.is_write = 1;
                                vcpu->arch.dcr_needed = 1;
                                kvmppc_account_exit(vcpu, DCR_EXITS);
@@ -146,13 +147,13 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 
        switch (sprn) {
        case SPRN_PID:
-               kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+               kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break;
        case SPRN_MMUCR:
-               vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
+               vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break;
        case SPRN_CCR0:
-               vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
+               vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break;
        case SPRN_CCR1:
-               vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
+               vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break;
        default:
                emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
        }
@@ -167,13 +168,13 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 
        switch (sprn) {
        case SPRN_PID:
-               vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break;
        case SPRN_MMUCR:
-               vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break;
        case SPRN_CCR0:
-               vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break;
        case SPRN_CCR1:
-               vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break;
        default:
                emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
        }
index ff3cb63b8117f3e1285be25a0dce34a7194f1328..2570fcc7665ddd9376f2db851371efef80fa2045 100644 (file)
@@ -439,7 +439,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
        struct kvmppc_44x_tlbe *tlbe;
        unsigned int gtlb_index;
 
-       gtlb_index = vcpu->arch.gpr[ra];
+       gtlb_index = kvmppc_get_gpr(vcpu, ra);
        if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
                printk("%s: index %d\n", __func__, gtlb_index);
                kvmppc_dump_vcpu(vcpu);
@@ -455,15 +455,15 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
        switch (ws) {
        case PPC44x_TLB_PAGEID:
                tlbe->tid = get_mmucr_stid(vcpu);
-               tlbe->word0 = vcpu->arch.gpr[rs];
+               tlbe->word0 = kvmppc_get_gpr(vcpu, rs);
                break;
 
        case PPC44x_TLB_XLAT:
-               tlbe->word1 = vcpu->arch.gpr[rs];
+               tlbe->word1 = kvmppc_get_gpr(vcpu, rs);
                break;
 
        case PPC44x_TLB_ATTRIB:
-               tlbe->word2 = vcpu->arch.gpr[rs];
+               tlbe->word2 = kvmppc_get_gpr(vcpu, rs);
                break;
 
        default:
@@ -500,18 +500,20 @@ int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
        unsigned int as = get_mmucr_sts(vcpu);
        unsigned int pid = get_mmucr_stid(vcpu);
 
-       ea = vcpu->arch.gpr[rb];
+       ea = kvmppc_get_gpr(vcpu, rb);
        if (ra)
-               ea += vcpu->arch.gpr[ra];
+               ea += kvmppc_get_gpr(vcpu, ra);
 
        gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
        if (rc) {
+               u32 cr = kvmppc_get_cr(vcpu);
+
                if (gtlb_index < 0)
-                       vcpu->arch.cr &= ~0x20000000;
+                       kvmppc_set_cr(vcpu, cr & ~0x20000000);
                else
-                       vcpu->arch.cr |= 0x20000000;
+                       kvmppc_set_cr(vcpu, cr | 0x20000000);
        }
-       vcpu->arch.gpr[rt] = gtlb_index;
+       kvmppc_set_gpr(vcpu, rt, gtlb_index);
 
        kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
        return EMULATE_DONE;
index fe037fdaf1b392639a33030efd6b8a2190dc1ad4..60624cc9f4d4067a2c891e862a83b7c1a83f2de0 100644 (file)
@@ -20,6 +20,7 @@ config KVM
        bool
        select PREEMPT_NOTIFIERS
        select ANON_INODES
+       select KVM_MMIO
 
 config KVM_BOOK3S_64_HANDLER
        bool
index 3e294bd9b8c6d998ef262d013de4e02a3468fbca..9a271f0929c727c100fb0cda64dff969f020f49a 100644 (file)
 
 /* #define EXIT_DEBUG */
 /* #define EXIT_DEBUG_SIMPLE */
+/* #define DEBUG_EXT */
 
-/* Without AGGRESSIVE_DEC we only fire off a DEC interrupt when DEC turns 0.
- * When set, we retrigger a DEC interrupt after that if DEC <= 0.
- * PPC32 Linux runs faster without AGGRESSIVE_DEC, PPC64 Linux requires it. */
-
-/* #define AGGRESSIVE_DEC */
+static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "exits",       VCPU_STAT(sum_exits) },
@@ -72,16 +69,24 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb));
+       memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu,
+              sizeof(get_paca()->shadow_vcpu));
        get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max;
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
        memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb));
+       memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
+              sizeof(get_paca()->shadow_vcpu));
        to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max;
+
+       kvmppc_giveup_ext(vcpu, MSR_FP);
+       kvmppc_giveup_ext(vcpu, MSR_VEC);
+       kvmppc_giveup_ext(vcpu, MSR_VSX);
 }
 
-#if defined(AGGRESSIVE_DEC) || defined(EXIT_DEBUG)
+#if defined(EXIT_DEBUG)
 static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
 {
        u64 jd = mftb() - vcpu->arch.dec_jiffies;
@@ -89,6 +94,23 @@ static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
 }
 #endif
 
+static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.shadow_msr = vcpu->arch.msr;
+       /* Guest MSR values */
+       vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
+                                MSR_BE | MSR_DE;
+       /* Process MSR values */
+       vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
+                                MSR_EE;
+       /* External providers the guest reserved */
+       vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
+       /* 64-bit Process MSR values */
+#ifdef CONFIG_PPC_BOOK3S_64
+       vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
+#endif
+}
+
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 {
        ulong old_msr = vcpu->arch.msr;
@@ -96,12 +118,10 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 #ifdef EXIT_DEBUG
        printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
 #endif
+
        msr &= to_book3s(vcpu)->msr_mask;
        vcpu->arch.msr = msr;
-       vcpu->arch.shadow_msr = msr | MSR_USER32;
-       vcpu->arch.shadow_msr &= ( MSR_VEC | MSR_VSX | MSR_FP | MSR_FE0 |
-                                  MSR_USER64 | MSR_SE | MSR_BE | MSR_DE |
-                                  MSR_FE1);
+       kvmppc_recalc_shadow_msr(vcpu);
 
        if (msr & (MSR_WE|MSR_POW)) {
                if (!vcpu->arch.pending_exceptions) {
@@ -125,11 +145,10 @@ void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
        vcpu->arch.mmu.reset_msr(vcpu);
 }
 
-void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
+static int kvmppc_book3s_vec2irqprio(unsigned int vec)
 {
        unsigned int prio;
 
-       vcpu->stat.queue_intr++;
        switch (vec) {
        case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET;         break;
        case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK;        break;
@@ -149,15 +168,31 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
        default:    prio = BOOK3S_IRQPRIO_MAX;                  break;
        }
 
-       set_bit(prio, &vcpu->arch.pending_exceptions);
+       return prio;
+}
+
+static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+                                         unsigned int vec)
+{
+       clear_bit(kvmppc_book3s_vec2irqprio(vec),
+                 &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
+{
+       vcpu->stat.queue_intr++;
+
+       set_bit(kvmppc_book3s_vec2irqprio(vec),
+               &vcpu->arch.pending_exceptions);
 #ifdef EXIT_DEBUG
        printk(KERN_INFO "Queueing interrupt %x\n", vec);
 #endif
 }
 
 
-void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
 {
+       to_book3s(vcpu)->prog_flags = flags;
        kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM);
 }
 
@@ -171,6 +206,11 @@ int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
        return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
 }
 
+void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
+{
+       kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
+}
+
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
 {
@@ -181,6 +221,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 {
        int deliver = 1;
        int vec = 0;
+       ulong flags = 0ULL;
 
        switch (priority) {
        case BOOK3S_IRQPRIO_DECREMENTER:
@@ -214,6 +255,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
                break;
        case BOOK3S_IRQPRIO_PROGRAM:
                vec = BOOK3S_INTERRUPT_PROGRAM;
+               flags = to_book3s(vcpu)->prog_flags;
                break;
        case BOOK3S_IRQPRIO_VSX:
                vec = BOOK3S_INTERRUPT_VSX;
@@ -244,7 +286,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 #endif
 
        if (deliver)
-               kvmppc_inject_interrupt(vcpu, vec, 0ULL);
+               kvmppc_inject_interrupt(vcpu, vec, flags);
 
        return deliver;
 }
@@ -254,21 +296,15 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
        unsigned long *pending = &vcpu->arch.pending_exceptions;
        unsigned int priority;
 
-       /* XXX be more clever here - no need to mftb() on every entry */
-       /* Issue DEC again if it's still active */
-#ifdef AGGRESSIVE_DEC
-       if (vcpu->arch.msr & MSR_EE)
-               if (kvmppc_get_dec(vcpu) & 0x80000000)
-                       kvmppc_core_queue_dec(vcpu);
-#endif
-
 #ifdef EXIT_DEBUG
        if (vcpu->arch.pending_exceptions)
                printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
 #endif
        priority = __ffs(*pending);
        while (priority <= (sizeof(unsigned int) * 8)) {
-               if (kvmppc_book3s_irqprio_deliver(vcpu, priority)) {
+               if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
+                   (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
+                       /* DEC interrupts get cleared by mtdec */
                        clear_bit(priority, &vcpu->arch.pending_exceptions);
                        break;
                }
@@ -503,14 +539,14 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                /* Page not found in guest PTE entries */
                vcpu->arch.dear = vcpu->arch.fault_dear;
                to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
-               vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL);
+               vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EPERM) {
                /* Storage protection */
                vcpu->arch.dear = vcpu->arch.fault_dear;
                to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
                to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
-               vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL);
+               vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
                kvmppc_book3s_queue_irqprio(vcpu, vec);
        } else if (page_found == -EINVAL) {
                /* Page not found in guest SLB */
@@ -532,13 +568,122 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                r = kvmppc_emulate_mmio(run, vcpu);
                if ( r == RESUME_HOST_NV )
                        r = RESUME_HOST;
-               if ( r == RESUME_GUEST_NV )
-                       r = RESUME_GUEST;
        }
 
        return r;
 }
 
+static inline int get_fpr_index(int i)
+{
+#ifdef CONFIG_VSX
+       i *= 2;
+#endif
+       return i;
+}
+
+/* Give up external provider (FPU, Altivec, VSX) */
+static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
+{
+       struct thread_struct *t = &current->thread;
+       u64 *vcpu_fpr = vcpu->arch.fpr;
+       u64 *vcpu_vsx = vcpu->arch.vsr;
+       u64 *thread_fpr = (u64*)t->fpr;
+       int i;
+
+       if (!(vcpu->arch.guest_owned_ext & msr))
+               return;
+
+#ifdef DEBUG_EXT
+       printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
+#endif
+
+       switch (msr) {
+       case MSR_FP:
+               giveup_fpu(current);
+               for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
+                       vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
+
+               vcpu->arch.fpscr = t->fpscr.val;
+               break;
+       case MSR_VEC:
+#ifdef CONFIG_ALTIVEC
+               giveup_altivec(current);
+               memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
+               vcpu->arch.vscr = t->vscr;
+#endif
+               break;
+       case MSR_VSX:
+#ifdef CONFIG_VSX
+               __giveup_vsx(current);
+               for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
+                       vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
+#endif
+               break;
+       default:
+               BUG();
+       }
+
+       vcpu->arch.guest_owned_ext &= ~msr;
+       current->thread.regs->msr &= ~msr;
+       kvmppc_recalc_shadow_msr(vcpu);
+}
+
+/* Handle external providers (FPU, Altivec, VSX) */
+static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
+                            ulong msr)
+{
+       struct thread_struct *t = &current->thread;
+       u64 *vcpu_fpr = vcpu->arch.fpr;
+       u64 *vcpu_vsx = vcpu->arch.vsr;
+       u64 *thread_fpr = (u64*)t->fpr;
+       int i;
+
+       if (!(vcpu->arch.msr & msr)) {
+               kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+               return RESUME_GUEST;
+       }
+
+#ifdef DEBUG_EXT
+       printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
+#endif
+
+       current->thread.regs->msr |= msr;
+
+       switch (msr) {
+       case MSR_FP:
+               for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
+                       thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
+
+               t->fpscr.val = vcpu->arch.fpscr;
+               t->fpexc_mode = 0;
+               kvmppc_load_up_fpu();
+               break;
+       case MSR_VEC:
+#ifdef CONFIG_ALTIVEC
+               memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
+               t->vscr = vcpu->arch.vscr;
+               t->vrsave = -1;
+               kvmppc_load_up_altivec();
+#endif
+               break;
+       case MSR_VSX:
+#ifdef CONFIG_VSX
+               for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
+                       thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
+               kvmppc_load_up_vsx();
+#endif
+               break;
+       default:
+               BUG();
+       }
+
+       vcpu->arch.guest_owned_ext |= msr;
+
+       kvmppc_recalc_shadow_msr(vcpu);
+
+       return RESUME_GUEST;
+}
+
 int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        unsigned int exit_nr)
 {
@@ -563,7 +708,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        case BOOK3S_INTERRUPT_INST_STORAGE:
                vcpu->stat.pf_instruc++;
                /* only care about PTEG not found errors, but leave NX alone */
-               if (vcpu->arch.shadow_msr & 0x40000000) {
+               if (vcpu->arch.shadow_srr1 & 0x40000000) {
                        r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr);
                        vcpu->stat.sp_instruc++;
                } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -575,7 +720,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                         */
                        kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
                } else {
-                       vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x58000000);
+                       vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000;
                        kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
                        kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
                        r = RESUME_GUEST;
@@ -621,6 +766,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        case BOOK3S_INTERRUPT_PROGRAM:
        {
                enum emulation_result er;
+               ulong flags;
+
+               flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
 
                if (vcpu->arch.msr & MSR_PR) {
 #ifdef EXIT_DEBUG
@@ -628,7 +776,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 #endif
                        if ((vcpu->arch.last_inst & 0xff0007ff) !=
                            (INS_DCBZ & 0xfffffff7)) {
-                               kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+                               kvmppc_core_queue_program(vcpu, flags);
                                r = RESUME_GUEST;
                                break;
                        }
@@ -638,12 +786,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                er = kvmppc_emulate_instruction(run, vcpu);
                switch (er) {
                case EMULATE_DONE:
-                       r = RESUME_GUEST;
+                       r = RESUME_GUEST_NV;
                        break;
                case EMULATE_FAIL:
                        printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
                               __func__, vcpu->arch.pc, vcpu->arch.last_inst);
-                       kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+                       kvmppc_core_queue_program(vcpu, flags);
                        r = RESUME_GUEST;
                        break;
                default:
@@ -653,23 +801,30 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
        }
        case BOOK3S_INTERRUPT_SYSCALL:
 #ifdef EXIT_DEBUG
-               printk(KERN_INFO "Syscall Nr %d\n", (int)vcpu->arch.gpr[0]);
+               printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0));
 #endif
                vcpu->stat.syscall_exits++;
                kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
                r = RESUME_GUEST;
                break;
-       case BOOK3S_INTERRUPT_MACHINE_CHECK:
        case BOOK3S_INTERRUPT_FP_UNAVAIL:
-       case BOOK3S_INTERRUPT_TRACE:
+               r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP);
+               break;
        case BOOK3S_INTERRUPT_ALTIVEC:
+               r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC);
+               break;
        case BOOK3S_INTERRUPT_VSX:
+               r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX);
+               break;
+       case BOOK3S_INTERRUPT_MACHINE_CHECK:
+       case BOOK3S_INTERRUPT_TRACE:
                kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
                r = RESUME_GUEST;
                break;
        default:
                /* Ugh - bork here! What did we get? */
-               printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", exit_nr, vcpu->arch.pc, vcpu->arch.shadow_msr);
+               printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
+                       exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1);
                r = RESUME_HOST;
                BUG();
                break;
@@ -712,10 +867,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        int i;
 
        regs->pc = vcpu->arch.pc;
-       regs->cr = vcpu->arch.cr;
+       regs->cr = kvmppc_get_cr(vcpu);
        regs->ctr = vcpu->arch.ctr;
        regs->lr = vcpu->arch.lr;
-       regs->xer = vcpu->arch.xer;
+       regs->xer = kvmppc_get_xer(vcpu);
        regs->msr = vcpu->arch.msr;
        regs->srr0 = vcpu->arch.srr0;
        regs->srr1 = vcpu->arch.srr1;
@@ -729,7 +884,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        regs->sprg7 = vcpu->arch.sprg6;
 
        for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
-               regs->gpr[i] = vcpu->arch.gpr[i];
+               regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
 
        return 0;
 }
@@ -739,10 +894,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        int i;
 
        vcpu->arch.pc = regs->pc;
-       vcpu->arch.cr = regs->cr;
+       kvmppc_set_cr(vcpu, regs->cr);
        vcpu->arch.ctr = regs->ctr;
        vcpu->arch.lr = regs->lr;
-       vcpu->arch.xer = regs->xer;
+       kvmppc_set_xer(vcpu, regs->xer);
        kvmppc_set_msr(vcpu, regs->msr);
        vcpu->arch.srr0 = regs->srr0;
        vcpu->arch.srr1 = regs->srr1;
@@ -754,8 +909,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        vcpu->arch.sprg6 = regs->sprg5;
        vcpu->arch.sprg7 = regs->sprg6;
 
-       for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
-               vcpu->arch.gpr[i] = regs->gpr[i];
+       for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+               kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
 
        return 0;
 }
@@ -850,7 +1005,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
        int is_dirty = 0;
        int r, n;
 
-       down_write(&kvm->slots_lock);
+       mutex_lock(&kvm->slots_lock);
 
        r = kvm_get_dirty_log(kvm, log, &is_dirty);
        if (r)
@@ -858,7 +1013,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
        /* If nothing is dirty, don't bother messing with page tables. */
        if (is_dirty) {
-               memslot = &kvm->memslots[log->slot];
+               memslot = &kvm->memslots->memslots[log->slot];
 
                ga = memslot->base_gfn << PAGE_SHIFT;
                ga_end = ga + (memslot->npages << PAGE_SHIFT);
@@ -872,7 +1027,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 
        r = 0;
 out:
-       up_write(&kvm->slots_lock);
+       mutex_unlock(&kvm->slots_lock);
        return r;
 }
 
@@ -910,6 +1065,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
        vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
        vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
        vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
+       vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
 
        vcpu->arch.shadow_msr = MSR_USER64;
 
@@ -943,6 +1099,10 @@ extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
        int ret;
+       struct thread_struct ext_bkp;
+       bool save_vec = current->thread.used_vr;
+       bool save_vsx = current->thread.used_vsr;
+       ulong ext_msr;
 
        /* No need to go into the guest when all we do is going out */
        if (signal_pending(current)) {
@@ -950,6 +1110,35 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                return -EINTR;
        }
 
+       /* Save FPU state in stack */
+       if (current->thread.regs->msr & MSR_FP)
+               giveup_fpu(current);
+       memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr));
+       ext_bkp.fpscr = current->thread.fpscr;
+       ext_bkp.fpexc_mode = current->thread.fpexc_mode;
+
+#ifdef CONFIG_ALTIVEC
+       /* Save Altivec state in stack */
+       if (save_vec) {
+               if (current->thread.regs->msr & MSR_VEC)
+                       giveup_altivec(current);
+               memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr));
+               ext_bkp.vscr = current->thread.vscr;
+               ext_bkp.vrsave = current->thread.vrsave;
+       }
+       ext_bkp.used_vr = current->thread.used_vr;
+#endif
+
+#ifdef CONFIG_VSX
+       /* Save VSX state in stack */
+       if (save_vsx && (current->thread.regs->msr & MSR_VSX))
+                       __giveup_vsx(current);
+       ext_bkp.used_vsr = current->thread.used_vsr;
+#endif
+
+       /* Remember the MSR with disabled extensions */
+       ext_msr = current->thread.regs->msr;
+
        /* XXX we get called with irq disabled - change that! */
        local_irq_enable();
 
@@ -957,6 +1146,32 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
        local_irq_disable();
 
+       current->thread.regs->msr = ext_msr;
+
+       /* Make sure we save the guest FPU/Altivec/VSX state */
+       kvmppc_giveup_ext(vcpu, MSR_FP);
+       kvmppc_giveup_ext(vcpu, MSR_VEC);
+       kvmppc_giveup_ext(vcpu, MSR_VSX);
+
+       /* Restore FPU state from stack */
+       memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr));
+       current->thread.fpscr = ext_bkp.fpscr;
+       current->thread.fpexc_mode = ext_bkp.fpexc_mode;
+
+#ifdef CONFIG_ALTIVEC
+       /* Restore Altivec state from stack */
+       if (save_vec && current->thread.used_vr) {
+               memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr));
+               current->thread.vscr = ext_bkp.vscr;
+               current->thread.vrsave= ext_bkp.vrsave;
+       }
+       current->thread.used_vr = ext_bkp.used_vr;
+#endif
+
+#ifdef CONFIG_VSX
+       current->thread.used_vsr = ext_bkp.used_vsr;
+#endif
+
        return ret;
 }
 
index 1027eac6d474fea3ae3a94efe6aca91cd48a594c..2b0ee7e040c90d7870851ae46d564b1e17451ad7 100644 (file)
@@ -65,11 +65,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
        case 31:
                switch (get_xop(inst)) {
                case OP_31_XOP_MFMSR:
-                       vcpu->arch.gpr[get_rt(inst)] = vcpu->arch.msr;
+                       kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr);
                        break;
                case OP_31_XOP_MTMSRD:
                {
-                       ulong rs = vcpu->arch.gpr[get_rs(inst)];
+                       ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
                        if (inst & 0x10000) {
                                vcpu->arch.msr &= ~(MSR_RI | MSR_EE);
                                vcpu->arch.msr |= rs & (MSR_RI | MSR_EE);
@@ -78,30 +78,30 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        break;
                }
                case OP_31_XOP_MTMSR:
-                       kvmppc_set_msr(vcpu, vcpu->arch.gpr[get_rs(inst)]);
+                       kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst)));
                        break;
                case OP_31_XOP_MFSRIN:
                {
                        int srnum;
 
-                       srnum = (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf;
+                       srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf;
                        if (vcpu->arch.mmu.mfsrin) {
                                u32 sr;
                                sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
-                               vcpu->arch.gpr[get_rt(inst)] = sr;
+                               kvmppc_set_gpr(vcpu, get_rt(inst), sr);
                        }
                        break;
                }
                case OP_31_XOP_MTSRIN:
                        vcpu->arch.mmu.mtsrin(vcpu,
-                               (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf,
-                               vcpu->arch.gpr[get_rs(inst)]);
+                               (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf,
+                               kvmppc_get_gpr(vcpu, get_rs(inst)));
                        break;
                case OP_31_XOP_TLBIE:
                case OP_31_XOP_TLBIEL:
                {
                        bool large = (inst & 0x00200000) ? true : false;
-                       ulong addr = vcpu->arch.gpr[get_rb(inst)];
+                       ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst));
                        vcpu->arch.mmu.tlbie(vcpu, addr, large);
                        break;
                }
@@ -111,14 +111,16 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        if (!vcpu->arch.mmu.slbmte)
                                return EMULATE_FAIL;
 
-                       vcpu->arch.mmu.slbmte(vcpu, vcpu->arch.gpr[get_rs(inst)],
-                                               vcpu->arch.gpr[get_rb(inst)]);
+                       vcpu->arch.mmu.slbmte(vcpu,
+                                       kvmppc_get_gpr(vcpu, get_rs(inst)),
+                                       kvmppc_get_gpr(vcpu, get_rb(inst)));
                        break;
                case OP_31_XOP_SLBIE:
                        if (!vcpu->arch.mmu.slbie)
                                return EMULATE_FAIL;
 
-                       vcpu->arch.mmu.slbie(vcpu, vcpu->arch.gpr[get_rb(inst)]);
+                       vcpu->arch.mmu.slbie(vcpu,
+                                       kvmppc_get_gpr(vcpu, get_rb(inst)));
                        break;
                case OP_31_XOP_SLBIA:
                        if (!vcpu->arch.mmu.slbia)
@@ -132,9 +134,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        } else {
                                ulong t, rb;
 
-                               rb = vcpu->arch.gpr[get_rb(inst)];
+                               rb = kvmppc_get_gpr(vcpu, get_rb(inst));
                                t = vcpu->arch.mmu.slbmfee(vcpu, rb);
-                               vcpu->arch.gpr[get_rt(inst)] = t;
+                               kvmppc_set_gpr(vcpu, get_rt(inst), t);
                        }
                        break;
                case OP_31_XOP_SLBMFEV:
@@ -143,20 +145,20 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        } else {
                                ulong t, rb;
 
-                               rb = vcpu->arch.gpr[get_rb(inst)];
+                               rb = kvmppc_get_gpr(vcpu, get_rb(inst));
                                t = vcpu->arch.mmu.slbmfev(vcpu, rb);
-                               vcpu->arch.gpr[get_rt(inst)] = t;
+                               kvmppc_set_gpr(vcpu, get_rt(inst), t);
                        }
                        break;
                case OP_31_XOP_DCBZ:
                {
-                       ulong rb =  vcpu->arch.gpr[get_rb(inst)];
+                       ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
                        ulong ra = 0;
                        ulong addr;
                        u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
                        if (get_ra(inst))
-                               ra = vcpu->arch.gpr[get_ra(inst)];
+                               ra = kvmppc_get_gpr(vcpu, get_ra(inst));
 
                        addr = (ra + rb) & ~31ULL;
                        if (!(vcpu->arch.msr & MSR_SF))
@@ -233,43 +235,44 @@ static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val)
 int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 {
        int emulated = EMULATE_DONE;
+       ulong spr_val = kvmppc_get_gpr(vcpu, rs);
 
        switch (sprn) {
        case SPRN_SDR1:
-               to_book3s(vcpu)->sdr1 = vcpu->arch.gpr[rs];
+               to_book3s(vcpu)->sdr1 = spr_val;
                break;
        case SPRN_DSISR:
-               to_book3s(vcpu)->dsisr = vcpu->arch.gpr[rs];
+               to_book3s(vcpu)->dsisr = spr_val;
                break;
        case SPRN_DAR:
-               vcpu->arch.dear = vcpu->arch.gpr[rs];
+               vcpu->arch.dear = spr_val;
                break;
        case SPRN_HIOR:
-               to_book3s(vcpu)->hior = vcpu->arch.gpr[rs];
+               to_book3s(vcpu)->hior = spr_val;
                break;
        case SPRN_IBAT0U ... SPRN_IBAT3L:
        case SPRN_IBAT4U ... SPRN_IBAT7L:
        case SPRN_DBAT0U ... SPRN_DBAT3L:
        case SPRN_DBAT4U ... SPRN_DBAT7L:
-               kvmppc_write_bat(vcpu, sprn, (u32)vcpu->arch.gpr[rs]);
+               kvmppc_write_bat(vcpu, sprn, (u32)spr_val);
                /* BAT writes happen so rarely that we're ok to flush
                 * everything here */
                kvmppc_mmu_pte_flush(vcpu, 0, 0);
                break;
        case SPRN_HID0:
-               to_book3s(vcpu)->hid[0] = vcpu->arch.gpr[rs];
+               to_book3s(vcpu)->hid[0] = spr_val;
                break;
        case SPRN_HID1:
-               to_book3s(vcpu)->hid[1] = vcpu->arch.gpr[rs];
+               to_book3s(vcpu)->hid[1] = spr_val;
                break;
        case SPRN_HID2:
-               to_book3s(vcpu)->hid[2] = vcpu->arch.gpr[rs];
+               to_book3s(vcpu)->hid[2] = spr_val;
                break;
        case SPRN_HID4:
-               to_book3s(vcpu)->hid[4] = vcpu->arch.gpr[rs];
+               to_book3s(vcpu)->hid[4] = spr_val;
                break;
        case SPRN_HID5:
-               to_book3s(vcpu)->hid[5] = vcpu->arch.gpr[rs];
+               to_book3s(vcpu)->hid[5] = spr_val;
                /* guest HID5 set can change is_dcbz32 */
                if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
                    (mfmsr() & MSR_HV))
@@ -299,38 +302,38 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 
        switch (sprn) {
        case SPRN_SDR1:
-               vcpu->arch.gpr[rt] = to_book3s(vcpu)->sdr1;
+               kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
                break;
        case SPRN_DSISR:
-               vcpu->arch.gpr[rt] = to_book3s(vcpu)->dsisr;
+               kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr);
                break;
        case SPRN_DAR:
-               vcpu->arch.gpr[rt] = vcpu->arch.dear;
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear);
                break;
        case SPRN_HIOR:
-               vcpu->arch.gpr[rt] = to_book3s(vcpu)->hior;
+               kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
                break;
        case SPRN_HID0:
-               vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[0];
+               kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]);
                break;
        case SPRN_HID1:
-               vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[1];
+               kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]);
                break;
        case SPRN_HID2:
-               vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[2];
+               kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]);
                break;
        case SPRN_HID4:
-               vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[4];
+               kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]);
                break;
        case SPRN_HID5:
-               vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[5];
+               kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
                break;
        case SPRN_THRM1:
        case SPRN_THRM2:
        case SPRN_THRM3:
        case SPRN_CTRLF:
        case SPRN_CTRLT:
-               vcpu->arch.gpr[rt] = 0;
+               kvmppc_set_gpr(vcpu, rt, 0);
                break;
        default:
                printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
index 5b2db38ed86cce3c1d69309308f0a49c63806ee4..1dd5a1ddfd0dfba437043c1aa4cfb3f3ed4aca62 100644 (file)
 
 EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter);
 EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem);
+EXPORT_SYMBOL_GPL(kvmppc_rmcall);
+EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
+#ifdef CONFIG_ALTIVEC
+EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
+#endif
+#ifdef CONFIG_VSX
+EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
+#endif
index 7b55d8094c8b372781bea6a2c35c689b0f273cc8..c1584d0cbce82d3ca9f3dd0565d6d5adacbfc3c7 100644 (file)
 #define ULONG_SIZE 8
 #define VCPU_GPR(n)     (VCPU_GPRS + (n * ULONG_SIZE))
 
-.macro mfpaca tmp_reg, src_reg, offset, vcpu_reg
-       ld      \tmp_reg, (PACA_EXMC+\offset)(r13)
-       std     \tmp_reg, VCPU_GPR(\src_reg)(\vcpu_reg)
-.endm
-
 .macro DISABLE_INTERRUPTS
        mfmsr   r0
        rldicl  r0,r0,48,1
        mtmsrd  r0,1
 .endm
 
+#define VCPU_LOAD_NVGPRS(vcpu) \
+       ld      r14, VCPU_GPR(r14)(vcpu); \
+       ld      r15, VCPU_GPR(r15)(vcpu); \
+       ld      r16, VCPU_GPR(r16)(vcpu); \
+       ld      r17, VCPU_GPR(r17)(vcpu); \
+       ld      r18, VCPU_GPR(r18)(vcpu); \
+       ld      r19, VCPU_GPR(r19)(vcpu); \
+       ld      r20, VCPU_GPR(r20)(vcpu); \
+       ld      r21, VCPU_GPR(r21)(vcpu); \
+       ld      r22, VCPU_GPR(r22)(vcpu); \
+       ld      r23, VCPU_GPR(r23)(vcpu); \
+       ld      r24, VCPU_GPR(r24)(vcpu); \
+       ld      r25, VCPU_GPR(r25)(vcpu); \
+       ld      r26, VCPU_GPR(r26)(vcpu); \
+       ld      r27, VCPU_GPR(r27)(vcpu); \
+       ld      r28, VCPU_GPR(r28)(vcpu); \
+       ld      r29, VCPU_GPR(r29)(vcpu); \
+       ld      r30, VCPU_GPR(r30)(vcpu); \
+       ld      r31, VCPU_GPR(r31)(vcpu); \
+
 /*****************************************************************************
  *                                                                           *
  *     Guest entry / exit code that is in kernel module memory (highmem)     *
@@ -67,61 +82,32 @@ kvm_start_entry:
        SAVE_NVGPRS(r1)
 
        /* Save LR */
-       mflr    r14
-       std     r14, _LINK(r1)
-
-/* XXX optimize non-volatile loading away */
-kvm_start_lightweight:
+       std     r0, _LINK(r1)
 
-       DISABLE_INTERRUPTS
+       /* Load non-volatile guest state from the vcpu */
+       VCPU_LOAD_NVGPRS(r4)
 
        /* Save R1/R2 in the PACA */
-       std     r1, PACAR1(r13)
-       std     r2, (PACA_EXMC+EX_SRR0)(r13)
+       std     r1, PACA_KVM_HOST_R1(r13)
+       std     r2, PACA_KVM_HOST_R2(r13)
+
+       /* XXX swap in/out on load? */
        ld      r3, VCPU_HIGHMEM_HANDLER(r4)
-       std     r3, PACASAVEDMSR(r13)
+       std     r3, PACA_KVM_VMHANDLER(r13)
 
-       /* Load non-volatile guest state from the vcpu */
-       ld      r14, VCPU_GPR(r14)(r4)
-       ld      r15, VCPU_GPR(r15)(r4)
-       ld      r16, VCPU_GPR(r16)(r4)
-       ld      r17, VCPU_GPR(r17)(r4)
-       ld      r18, VCPU_GPR(r18)(r4)
-       ld      r19, VCPU_GPR(r19)(r4)
-       ld      r20, VCPU_GPR(r20)(r4)
-       ld      r21, VCPU_GPR(r21)(r4)
-       ld      r22, VCPU_GPR(r22)(r4)
-       ld      r23, VCPU_GPR(r23)(r4)
-       ld      r24, VCPU_GPR(r24)(r4)
-       ld      r25, VCPU_GPR(r25)(r4)
-       ld      r26, VCPU_GPR(r26)(r4)
-       ld      r27, VCPU_GPR(r27)(r4)
-       ld      r28, VCPU_GPR(r28)(r4)
-       ld      r29, VCPU_GPR(r29)(r4)
-       ld      r30, VCPU_GPR(r30)(r4)
-       ld      r31, VCPU_GPR(r31)(r4)
+kvm_start_lightweight:
 
        ld      r9, VCPU_PC(r4)                 /* r9 = vcpu->arch.pc */
        ld      r10, VCPU_SHADOW_MSR(r4)        /* r10 = vcpu->arch.shadow_msr */
 
-       ld      r3, VCPU_TRAMPOLINE_ENTER(r4)
-       mtsrr0  r3
-
-       LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
-       mtsrr1  r3
-
-       /* Load guest state in the respective registers */
-       lwz     r3, VCPU_CR(r4)         /* r3 = vcpu->arch.cr */
-       stw     r3, (PACA_EXMC + EX_CCR)(r13)
-
-       ld      r3, VCPU_CTR(r4)        /* r3 = vcpu->arch.ctr */
-       mtctr   r3                      /* CTR = r3 */
+       /* Load some guest state in the respective registers */
+       ld      r5, VCPU_CTR(r4)        /* r5 = vcpu->arch.ctr */
+                                       /* will be swapped in by rmcall */
 
        ld      r3, VCPU_LR(r4)         /* r3 = vcpu->arch.lr */
        mtlr    r3                      /* LR = r3 */
 
-       ld      r3, VCPU_XER(r4)        /* r3 = vcpu->arch.xer */
-       std     r3, (PACA_EXMC + EX_R3)(r13)
+       DISABLE_INTERRUPTS
 
        /* Some guests may need to have dcbz set to 32 byte length.
         *
@@ -141,36 +127,15 @@ kvm_start_lightweight:
        mtspr   SPRN_HID5,r3
 
 no_dcbz32_on:
-       /*      Load guest GPRs */
-
-       ld      r3, VCPU_GPR(r9)(r4)
-       std     r3, (PACA_EXMC + EX_R9)(r13)
-       ld      r3, VCPU_GPR(r10)(r4)
-       std     r3, (PACA_EXMC + EX_R10)(r13)
-       ld      r3, VCPU_GPR(r11)(r4)
-       std     r3, (PACA_EXMC + EX_R11)(r13)
-       ld      r3, VCPU_GPR(r12)(r4)
-       std     r3, (PACA_EXMC + EX_R12)(r13)
-       ld      r3, VCPU_GPR(r13)(r4)
-       std     r3, (PACA_EXMC + EX_R13)(r13)
-
-       ld      r0, VCPU_GPR(r0)(r4)
-       ld      r1, VCPU_GPR(r1)(r4)
-       ld      r2, VCPU_GPR(r2)(r4)
-       ld      r3, VCPU_GPR(r3)(r4)
-       ld      r5, VCPU_GPR(r5)(r4)
-       ld      r6, VCPU_GPR(r6)(r4)
-       ld      r7, VCPU_GPR(r7)(r4)
-       ld      r8, VCPU_GPR(r8)(r4)
-       ld      r4, VCPU_GPR(r4)(r4)
-
-       /* This sets the Magic value for the trampoline */
-
-       li      r11, 1
-       stb     r11, PACA_KVM_IN_GUEST(r13)
+
+       ld      r6, VCPU_RMCALL(r4)
+       mtctr   r6
+
+       ld      r3, VCPU_TRAMPOLINE_ENTER(r4)
+       LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
 
        /* Jump to SLB patching handlder and into our guest */
-       RFI
+       bctr
 
 /*
  * This is the handler in module memory. It gets jumped at from the
@@ -184,125 +149,70 @@ kvmppc_handler_highmem:
        /*
         * Register usage at this point:
         *
-        * R00   = guest R13
-        * R01   = host R1
-        * R02   = host R2
-        * R10   = guest PC
-        * R11   = guest MSR
-        * R12   = exit handler id
-        * R13   = PACA
-        * PACA.exmc.R9    = guest R1
-        * PACA.exmc.R10   = guest R10
-        * PACA.exmc.R11   = guest R11
-        * PACA.exmc.R12   = guest R12
-        * PACA.exmc.R13   = guest R2
-        * PACA.exmc.DAR   = guest DAR
-        * PACA.exmc.DSISR = guest DSISR
-        * PACA.exmc.LR    = guest instruction
-        * PACA.exmc.CCR   = guest CR
-        * PACA.exmc.SRR0  = guest R0
+        * R0         = guest last inst
+        * R1         = host R1
+        * R2         = host R2
+        * R3         = guest PC
+        * R4         = guest MSR
+        * R5         = guest DAR
+        * R6         = guest DSISR
+        * R13        = PACA
+        * PACA.KVM.* = guest *
         *
         */
 
-       std     r3, (PACA_EXMC+EX_R3)(r13)
+       /* R7 = vcpu */
+       ld      r7, GPR4(r1)
 
-       /* save the exit id in R3 */
-       mr      r3, r12
+       /* Now save the guest state */
 
-       /* R12 = vcpu */
-       ld      r12, GPR4(r1)
+       stw     r0, VCPU_LAST_INST(r7)
 
-       /* Now save the guest state */
+       std     r3, VCPU_PC(r7)
+       std     r4, VCPU_SHADOW_SRR1(r7)
+       std     r5, VCPU_FAULT_DEAR(r7)
+       std     r6, VCPU_FAULT_DSISR(r7)
 
-       std     r0, VCPU_GPR(r13)(r12)
-       std     r4, VCPU_GPR(r4)(r12)
-       std     r5, VCPU_GPR(r5)(r12)
-       std     r6, VCPU_GPR(r6)(r12)
-       std     r7, VCPU_GPR(r7)(r12)
-       std     r8, VCPU_GPR(r8)(r12)
-       std     r9, VCPU_GPR(r9)(r12)
-
-       /* get registers from PACA */
-       mfpaca  r5, r0, EX_SRR0, r12
-       mfpaca  r5, r3, EX_R3, r12
-       mfpaca  r5, r1, EX_R9, r12
-       mfpaca  r5, r10, EX_R10, r12
-       mfpaca  r5, r11, EX_R11, r12
-       mfpaca  r5, r12, EX_R12, r12
-       mfpaca  r5, r2, EX_R13, r12
-
-       lwz     r5, (PACA_EXMC+EX_LR)(r13)
-       stw     r5, VCPU_LAST_INST(r12)
-
-       lwz     r5, (PACA_EXMC+EX_CCR)(r13)
-       stw     r5, VCPU_CR(r12)
-
-       ld      r5, VCPU_HFLAGS(r12)
+       ld      r5, VCPU_HFLAGS(r7)
        rldicl. r5, r5, 0, 63           /* CR = ((r5 & 1) == 0) */
        beq     no_dcbz32_off
 
+       li      r4, 0
        mfspr   r5,SPRN_HID5
-       rldimi  r5,r5,6,56
+       rldimi  r5,r4,6,56
        mtspr   SPRN_HID5,r5
 
 no_dcbz32_off:
 
-       /* XXX maybe skip on lightweight? */
-       std     r14, VCPU_GPR(r14)(r12)
-       std     r15, VCPU_GPR(r15)(r12)
-       std     r16, VCPU_GPR(r16)(r12)
-       std     r17, VCPU_GPR(r17)(r12)
-       std     r18, VCPU_GPR(r18)(r12)
-       std     r19, VCPU_GPR(r19)(r12)
-       std     r20, VCPU_GPR(r20)(r12)
-       std     r21, VCPU_GPR(r21)(r12)
-       std     r22, VCPU_GPR(r22)(r12)
-       std     r23, VCPU_GPR(r23)(r12)
-       std     r24, VCPU_GPR(r24)(r12)
-       std     r25, VCPU_GPR(r25)(r12)
-       std     r26, VCPU_GPR(r26)(r12)
-       std     r27, VCPU_GPR(r27)(r12)
-       std     r28, VCPU_GPR(r28)(r12)
-       std     r29, VCPU_GPR(r29)(r12)
-       std     r30, VCPU_GPR(r30)(r12)
-       std     r31, VCPU_GPR(r31)(r12)
-
-       /* Restore non-volatile host registers (r14 - r31) */
-       REST_NVGPRS(r1)
-
-       /* Save guest PC (R10) */
-       std     r10, VCPU_PC(r12)
-
-       /* Save guest msr (R11) */
-       std     r11, VCPU_SHADOW_MSR(r12)
-
-       /* Save guest CTR (in R12) */
+       std     r14, VCPU_GPR(r14)(r7)
+       std     r15, VCPU_GPR(r15)(r7)
+       std     r16, VCPU_GPR(r16)(r7)
+       std     r17, VCPU_GPR(r17)(r7)
+       std     r18, VCPU_GPR(r18)(r7)
+       std     r19, VCPU_GPR(r19)(r7)
+       std     r20, VCPU_GPR(r20)(r7)
+       std     r21, VCPU_GPR(r21)(r7)
+       std     r22, VCPU_GPR(r22)(r7)
+       std     r23, VCPU_GPR(r23)(r7)
+       std     r24, VCPU_GPR(r24)(r7)
+       std     r25, VCPU_GPR(r25)(r7)
+       std     r26, VCPU_GPR(r26)(r7)
+       std     r27, VCPU_GPR(r27)(r7)
+       std     r28, VCPU_GPR(r28)(r7)
+       std     r29, VCPU_GPR(r29)(r7)
+       std     r30, VCPU_GPR(r30)(r7)
+       std     r31, VCPU_GPR(r31)(r7)
+
+       /* Save guest CTR */
        mfctr   r5
-       std     r5, VCPU_CTR(r12)
+       std     r5, VCPU_CTR(r7)
 
        /* Save guest LR */
        mflr    r5
-       std     r5, VCPU_LR(r12)
-
-       /* Save guest XER */
-       mfxer   r5
-       std     r5, VCPU_XER(r12)
-
-       /* Save guest DAR */
-       ld      r5, (PACA_EXMC+EX_DAR)(r13)
-       std     r5, VCPU_FAULT_DEAR(r12)
-
-       /* Save guest DSISR */
-       lwz     r5, (PACA_EXMC+EX_DSISR)(r13)
-       std     r5, VCPU_FAULT_DSISR(r12)
+       std     r5, VCPU_LR(r7)
 
        /* Restore host msr -> SRR1 */
-       ld      r7, VCPU_HOST_MSR(r12)
-       mtsrr1  r7
-
-       /* Restore host IP -> SRR0 */
-       ld      r6, VCPU_HOST_RETIP(r12)
-       mtsrr0  r6
+       ld      r6, VCPU_HOST_MSR(r7)
 
        /*
         * For some interrupts, we need to call the real Linux
@@ -314,13 +224,14 @@ no_dcbz32_off:
         * r3 = address of interrupt handler (exit reason)
         */
 
-       cmpwi   r3, BOOK3S_INTERRUPT_EXTERNAL
+       cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
        beq     call_linux_handler
-       cmpwi   r3, BOOK3S_INTERRUPT_DECREMENTER
+       cmpwi   r12, BOOK3S_INTERRUPT_DECREMENTER
        beq     call_linux_handler
 
-       /* Back to Interruptable Mode! (goto kvm_return_point) */
-       RFI
+       /* Back to EE=1 */
+       mtmsr   r6
+       b       kvm_return_point
 
 call_linux_handler:
 
@@ -333,16 +244,22 @@ call_linux_handler:
         * interrupt handler!
         *
         * R3 still contains the exit code,
-        * R6 VCPU_HOST_RETIP and
-        * R7 VCPU_HOST_MSR
+        * R5 VCPU_HOST_RETIP and
+        * R6 VCPU_HOST_MSR
         */
 
-       mtlr    r3
+       /* Restore host IP -> SRR0 */
+       ld      r5, VCPU_HOST_RETIP(r7)
+
+       /* XXX Better move to a safe function?
+        *     What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
 
-       ld      r5, VCPU_TRAMPOLINE_LOWMEM(r12)
-       mtsrr0  r5
-       LOAD_REG_IMMEDIATE(r5, MSR_KERNEL & ~(MSR_IR | MSR_DR))
-       mtsrr1  r5
+       mtlr    r12
+
+       ld      r4, VCPU_TRAMPOLINE_LOWMEM(r7)
+       mtsrr0  r4
+       LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
+       mtsrr1  r3
 
        RFI
 
@@ -351,42 +268,51 @@ kvm_return_point:
 
        /* Jump back to lightweight entry if we're supposed to */
        /* go back into the guest */
-       mr      r5, r3
+
+       /* Pass the exit number as 3rd argument to kvmppc_handle_exit */
+       mr      r5, r12
+
        /* Restore r3 (kvm_run) and r4 (vcpu) */
        REST_2GPRS(3, r1)
        bl      KVMPPC_HANDLE_EXIT
 
-#if 0 /* XXX get lightweight exits back */
+       /* If RESUME_GUEST, get back in the loop */
        cmpwi   r3, RESUME_GUEST
-       bne     kvm_exit_heavyweight
+       beq     kvm_loop_lightweight
 
-       /* put VCPU and KVM_RUN back into place and roll again! */
-       REST_2GPRS(3, r1)
-       b       kvm_start_lightweight
+       cmpwi   r3, RESUME_GUEST_NV
+       beq     kvm_loop_heavyweight
 
-kvm_exit_heavyweight:
-       /* Restore non-volatile host registers */
-       ld      r14, _LINK(r1)
-       mtlr    r14
-       REST_NVGPRS(r1)
+kvm_exit_loop:
 
-       addi    r1, r1, SWITCH_FRAME_SIZE
-#else
        ld      r4, _LINK(r1)
        mtlr    r4
 
-       cmpwi   r3, RESUME_GUEST
-       bne     kvm_exit_heavyweight
+       /* Restore non-volatile host registers (r14 - r31) */
+       REST_NVGPRS(r1)
+
+       addi    r1, r1, SWITCH_FRAME_SIZE
+       blr
+
+kvm_loop_heavyweight:
+
+       ld      r4, _LINK(r1)
+       std     r4, (16 + SWITCH_FRAME_SIZE)(r1)
 
+       /* Load vcpu and cpu_run */
        REST_2GPRS(3, r1)
 
-       addi    r1, r1, SWITCH_FRAME_SIZE
+       /* Load non-volatile guest state from the vcpu */
+       VCPU_LOAD_NVGPRS(r4)
 
-       b       kvm_start_entry
+       /* Jump back into the beginning of this function */
+       b       kvm_start_lightweight
 
-kvm_exit_heavyweight:
+kvm_loop_lightweight:
 
-       addi    r1, r1, SWITCH_FRAME_SIZE
-#endif
+       /* We'll need the vcpu pointer */
+       REST_GPR(4, r1)
+
+       /* Jump back into the beginning of this function */
+       b       kvm_start_lightweight
 
-       blr
index e4beeb371a732fecdf3fc5fe1291ae01aa40b2a6..512dcff77554aa6d76bd3b432b2b1aa0cdb7cd93 100644 (file)
@@ -54,7 +54,7 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
                if (!vcpu_book3s->slb[i].valid)
                        continue;
 
-               if (vcpu_book3s->slb[i].large)
+               if (vcpu_book3s->slb[i].tb)
                        cmp_esid = esid_1t;
 
                if (vcpu_book3s->slb[i].esid == cmp_esid)
@@ -65,9 +65,10 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
                eaddr, esid, esid_1t);
        for (i = 0; i < vcpu_book3s->slb_nr; i++) {
            if (vcpu_book3s->slb[i].vsid)
-               dprintk("  %d: %c%c %llx %llx\n", i,
+               dprintk("  %d: %c%c%c %llx %llx\n", i,
                        vcpu_book3s->slb[i].valid ? 'v' : ' ',
                        vcpu_book3s->slb[i].large ? 'l' : ' ',
+                       vcpu_book3s->slb[i].tb    ? 't' : ' ',
                        vcpu_book3s->slb[i].esid,
                        vcpu_book3s->slb[i].vsid);
        }
@@ -84,7 +85,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
        if (!slb)
                return 0;
 
-       if (slb->large)
+       if (slb->tb)
                return (((u64)eaddr >> 12) & 0xfffffff) |
                       (((u64)slb->vsid) << 28);
 
@@ -309,7 +310,8 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
        slbe = &vcpu_book3s->slb[slb_nr];
 
        slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
-       slbe->esid  = slbe->large ? esid_1t : esid;
+       slbe->tb    = (rs & SLB_VSID_B_1T) ? 1 : 0;
+       slbe->esid  = slbe->tb ? esid_1t : esid;
        slbe->vsid  = rs >> 12;
        slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
        slbe->Ks    = (rs & SLB_VSID_KS) ? 1 : 0;
index fb7dd2e9ac8863fdfc48be3088cd73dcb6337df4..c83c60ad96c58a791b01913e481a2316a81882a4 100644 (file)
@@ -45,36 +45,25 @@ kvmppc_trampoline_\intno:
         * To distinguish, we check a magic byte in the PACA
         */
        mfspr   r13, SPRN_SPRG_PACA             /* r13 = PACA */
-       std     r12, (PACA_EXMC + EX_R12)(r13)
+       std     r12, PACA_KVM_SCRATCH0(r13)
        mfcr    r12
-       stw     r12, (PACA_EXMC + EX_CCR)(r13)
+       stw     r12, PACA_KVM_SCRATCH1(r13)
        lbz     r12, PACA_KVM_IN_GUEST(r13)
-       cmpwi   r12, 0
+       cmpwi   r12, KVM_GUEST_MODE_NONE
        bne     ..kvmppc_handler_hasmagic_\intno
        /* No KVM guest? Then jump back to the Linux handler! */
-       lwz     r12, (PACA_EXMC + EX_CCR)(r13)
+       lwz     r12, PACA_KVM_SCRATCH1(r13)
        mtcr    r12
-       ld      r12, (PACA_EXMC + EX_R12)(r13)
+       ld      r12, PACA_KVM_SCRATCH0(r13)
        mfspr   r13, SPRN_SPRG_SCRATCH0         /* r13 = original r13 */
        b       kvmppc_resume_\intno            /* Get back original handler */
 
        /* Now we know we're handling a KVM guest */
 ..kvmppc_handler_hasmagic_\intno:
-       /* Unset guest state */
-       li      r12, 0
-       stb     r12, PACA_KVM_IN_GUEST(r13)
 
-       std     r1, (PACA_EXMC+EX_R9)(r13)
-       std     r10, (PACA_EXMC+EX_R10)(r13)
-       std     r11, (PACA_EXMC+EX_R11)(r13)
-       std     r2, (PACA_EXMC+EX_R13)(r13)
-
-       mfsrr0  r10
-       mfsrr1  r11
-
-       /* Restore R1/R2 so we can handle faults */
-       ld      r1, PACAR1(r13)
-       ld      r2, (PACA_EXMC+EX_SRR0)(r13)
+       /* Should we just skip the faulting instruction? */
+       cmpwi   r12, KVM_GUEST_MODE_SKIP
+       beq     kvmppc_handler_skip_ins
 
        /* Let's store which interrupt we're handling */
        li      r12, \intno
@@ -101,24 +90,108 @@ INTERRUPT_TRAMPOLINE      BOOK3S_INTERRUPT_PERFMON
 INTERRUPT_TRAMPOLINE   BOOK3S_INTERRUPT_ALTIVEC
 INTERRUPT_TRAMPOLINE   BOOK3S_INTERRUPT_VSX
 
+/*
+ * Bring us back to the faulting code, but skip the
+ * faulting instruction.
+ *
+ * This is a generic exit path from the interrupt
+ * trampolines above.
+ *
+ * Input Registers:
+ *
+ * R12               = free
+ * R13               = PACA
+ * PACA.KVM.SCRATCH0 = guest R12
+ * PACA.KVM.SCRATCH1 = guest CR
+ * SPRG_SCRATCH0     = guest R13
+ *
+ */
+kvmppc_handler_skip_ins:
+
+       /* Patch the IP to the next instruction */
+       mfsrr0  r12
+       addi    r12, r12, 4
+       mtsrr0  r12
+
+       /* Clean up all state */
+       lwz     r12, PACA_KVM_SCRATCH1(r13)
+       mtcr    r12
+       ld      r12, PACA_KVM_SCRATCH0(r13)
+       mfspr   r13, SPRN_SPRG_SCRATCH0
+
+       /* And get back into the code */
+       RFI
+
 /*
  * This trampoline brings us back to a real mode handler
  *
  * Input Registers:
  *
- * R6 = SRR0
- * R7 = SRR1
+ * R5 = SRR0
+ * R6 = SRR1
  * LR = real-mode IP
  *
  */
 .global kvmppc_handler_lowmem_trampoline
 kvmppc_handler_lowmem_trampoline:
 
-       mtsrr0  r6
-       mtsrr1  r7
+       mtsrr0  r5
+       mtsrr1  r6
        blr
 kvmppc_handler_lowmem_trampoline_end:
 
+/*
+ * Call a function in real mode
+ *
+ * Input Registers:
+ *
+ * R3 = function
+ * R4 = MSR
+ * R5 = CTR
+ *
+ */
+_GLOBAL(kvmppc_rmcall)
+       mtmsr   r4              /* Disable relocation, so mtsrr
+                                  doesn't get interrupted */
+       mtctr   r5
+       mtsrr0  r3
+       mtsrr1  r4
+       RFI
+
+/*
+ * Activate current's external feature (FPU/Altivec/VSX)
+ */
+#define define_load_up(what)                           \
+                                                       \
+_GLOBAL(kvmppc_load_up_ ## what);                      \
+       subi    r1, r1, INT_FRAME_SIZE;                 \
+       mflr    r3;                                     \
+       std     r3, _LINK(r1);                          \
+       mfmsr   r4;                                     \
+       std     r31, GPR3(r1);                          \
+       mr      r31, r4;                                \
+       li      r5, MSR_DR;                             \
+       oris    r5, r5, MSR_EE@h;                       \
+       andc    r4, r4, r5;                             \
+       mtmsr   r4;                                     \
+                                                       \
+       bl      .load_up_ ## what;                      \
+                                                       \
+       mtmsr   r31;                                    \
+       ld      r3, _LINK(r1);                          \
+       ld      r31, GPR3(r1);                          \
+       addi    r1, r1, INT_FRAME_SIZE;                 \
+       mtlr    r3;                                     \
+       blr
+
+define_load_up(fpu)
+#ifdef CONFIG_ALTIVEC
+define_load_up(altivec)
+#endif
+#ifdef CONFIG_VSX
+define_load_up(vsx)
+#endif
+
 .global kvmppc_trampoline_lowmem
 kvmppc_trampoline_lowmem:
        .long kvmppc_handler_lowmem_trampoline - _stext
index ecd237a03fd0be4fb6ec8f853a520ca5f7e0016e..35b762722187ca884bd950bcbb894c4dfae09b32 100644 (file)
@@ -31,7 +31,7 @@
 #define REBOLT_SLB_ENTRY(num) \
        ld      r10, SHADOW_SLB_ESID(num)(r11); \
        cmpdi   r10, 0; \
-       beq     slb_exit_skip_1; \
+       beq     slb_exit_skip_ ## num; \
        oris    r10, r10, SLB_ESID_V@h; \
        ld      r9, SHADOW_SLB_VSID(num)(r11); \
        slbmte  r9, r10; \
@@ -51,23 +51,21 @@ kvmppc_handler_trampoline_enter:
         *
         * MSR = ~IR|DR
         * R13 = PACA
+        * R1 = host R1
+        * R2 = host R2
         * R9 = guest IP
         * R10 = guest MSR
-        * R11 = free
-        * R12 = free
-        * PACA[PACA_EXMC + EX_R9] = guest R9
-        * PACA[PACA_EXMC + EX_R10] = guest R10
-        * PACA[PACA_EXMC + EX_R11] = guest R11
-        * PACA[PACA_EXMC + EX_R12] = guest R12
-        * PACA[PACA_EXMC + EX_R13] = guest R13
-        * PACA[PACA_EXMC + EX_CCR] = guest CR
-        * PACA[PACA_EXMC + EX_R3] = guest XER
+        * all other GPRS = free
+        * PACA[KVM_CR] = guest CR
+        * PACA[KVM_XER] = guest XER
         */
 
        mtsrr0  r9
        mtsrr1  r10
 
-       mtspr   SPRN_SPRG_SCRATCH0, r0
+       /* Activate guest mode, so faults get handled by KVM */
+       li      r11, KVM_GUEST_MODE_GUEST
+       stb     r11, PACA_KVM_IN_GUEST(r13)
 
        /* Remove LPAR shadow entries */
 
@@ -131,20 +129,27 @@ slb_do_enter:
 
        /* Enter guest */
 
-       mfspr   r0, SPRN_SPRG_SCRATCH0
-
-       ld      r9, (PACA_EXMC+EX_R9)(r13)
-       ld      r10, (PACA_EXMC+EX_R10)(r13)
-       ld      r12, (PACA_EXMC+EX_R12)(r13)
-
-       lwz     r11, (PACA_EXMC+EX_CCR)(r13)
+       ld      r0, (PACA_KVM_R0)(r13)
+       ld      r1, (PACA_KVM_R1)(r13)
+       ld      r2, (PACA_KVM_R2)(r13)
+       ld      r3, (PACA_KVM_R3)(r13)
+       ld      r4, (PACA_KVM_R4)(r13)
+       ld      r5, (PACA_KVM_R5)(r13)
+       ld      r6, (PACA_KVM_R6)(r13)
+       ld      r7, (PACA_KVM_R7)(r13)
+       ld      r8, (PACA_KVM_R8)(r13)
+       ld      r9, (PACA_KVM_R9)(r13)
+       ld      r10, (PACA_KVM_R10)(r13)
+       ld      r12, (PACA_KVM_R12)(r13)
+
+       lwz     r11, (PACA_KVM_CR)(r13)
        mtcr    r11
 
-       ld      r11, (PACA_EXMC+EX_R3)(r13)
+       ld      r11, (PACA_KVM_XER)(r13)
        mtxer   r11
 
-       ld      r11, (PACA_EXMC+EX_R11)(r13)
-       ld      r13, (PACA_EXMC+EX_R13)(r13)
+       ld      r11, (PACA_KVM_R11)(r13)
+       ld      r13, (PACA_KVM_R13)(r13)
 
        RFI
 kvmppc_handler_trampoline_enter_end:
@@ -162,28 +167,54 @@ kvmppc_handler_trampoline_exit:
 
        /* Register usage at this point:
         *
-        * SPRG_SCRATCH0 = guest R13
-        * R01           = host R1
-        * R02           = host R2
-        * R10           = guest PC
-        * R11           = guest MSR
-        * R12           = exit handler id
-        * R13           = PACA
-        * PACA.exmc.CCR  = guest CR
-        * PACA.exmc.R9  = guest R1
-        * PACA.exmc.R10 = guest R10
-        * PACA.exmc.R11 = guest R11
-        * PACA.exmc.R12 = guest R12
-        * PACA.exmc.R13 = guest R2
+        * SPRG_SCRATCH0     = guest R13
+        * R12               = exit handler id
+        * R13               = PACA
+        * PACA.KVM.SCRATCH0 = guest R12
+        * PACA.KVM.SCRATCH1 = guest CR
         *
         */
 
        /* Save registers */
 
-       std     r0, (PACA_EXMC+EX_SRR0)(r13)
-       std     r9, (PACA_EXMC+EX_R3)(r13)
-       std     r10, (PACA_EXMC+EX_LR)(r13)
-       std     r11, (PACA_EXMC+EX_DAR)(r13)
+       std     r0, PACA_KVM_R0(r13)
+       std     r1, PACA_KVM_R1(r13)
+       std     r2, PACA_KVM_R2(r13)
+       std     r3, PACA_KVM_R3(r13)
+       std     r4, PACA_KVM_R4(r13)
+       std     r5, PACA_KVM_R5(r13)
+       std     r6, PACA_KVM_R6(r13)
+       std     r7, PACA_KVM_R7(r13)
+       std     r8, PACA_KVM_R8(r13)
+       std     r9, PACA_KVM_R9(r13)
+       std     r10, PACA_KVM_R10(r13)
+       std     r11, PACA_KVM_R11(r13)
+
+       /* Restore R1/R2 so we can handle faults */
+       ld      r1, PACA_KVM_HOST_R1(r13)
+       ld      r2, PACA_KVM_HOST_R2(r13)
+
+       /* Save guest PC and MSR in GPRs */
+       mfsrr0  r3
+       mfsrr1  r4
+
+       /* Get scratch'ed off registers */
+       mfspr   r9, SPRN_SPRG_SCRATCH0
+       std     r9, PACA_KVM_R13(r13)
+
+       ld      r8, PACA_KVM_SCRATCH0(r13)
+       std     r8, PACA_KVM_R12(r13)
+
+       lwz     r7, PACA_KVM_SCRATCH1(r13)
+       stw     r7, PACA_KVM_CR(r13)
+
+       /* Save more register state  */
+
+       mfxer   r6
+       stw     r6, PACA_KVM_XER(r13)
+
+       mfdar   r5
+       mfdsisr r6
 
        /*
         * In order for us to easily get the last instruction,
@@ -202,17 +233,28 @@ kvmppc_handler_trampoline_exit:
 
 ld_last_inst:
        /* Save off the guest instruction we're at */
+
+       /* Set guest mode to 'jump over instruction' so if lwz faults
+        * we'll just continue at the next IP. */
+       li      r9, KVM_GUEST_MODE_SKIP
+       stb     r9, PACA_KVM_IN_GUEST(r13)
+
        /*    1) enable paging for data */
        mfmsr   r9
        ori     r11, r9, MSR_DR                 /* Enable paging for data */
        mtmsr   r11
        /*    2) fetch the instruction */
-       lwz     r0, 0(r10)
+       li      r0, KVM_INST_FETCH_FAILED       /* In case lwz faults */
+       lwz     r0, 0(r3)
        /*    3) disable paging again */
        mtmsr   r9
 
 no_ld_last_inst:
 
+       /* Unset guest mode */
+       li      r9, KVM_GUEST_MODE_NONE
+       stb     r9, PACA_KVM_IN_GUEST(r13)
+
        /* Restore bolted entries from the shadow and fix it along the way */
 
        /* We don't store anything in entry 0, so we don't need to take care of it */
@@ -233,29 +275,27 @@ no_ld_last_inst:
 
 slb_do_exit:
 
-       /* Restore registers */
-
-       ld      r11, (PACA_EXMC+EX_DAR)(r13)
-       ld      r10, (PACA_EXMC+EX_LR)(r13)
-       ld      r9, (PACA_EXMC+EX_R3)(r13)
-
-       /* Save last inst */
-       stw     r0, (PACA_EXMC+EX_LR)(r13)
-
-       /* Save DAR and DSISR before going to paged mode */
-       mfdar   r0
-       std     r0, (PACA_EXMC+EX_DAR)(r13)
-       mfdsisr r0
-       stw     r0, (PACA_EXMC+EX_DSISR)(r13)
+       /* Register usage at this point:
+        *
+        * R0         = guest last inst
+        * R1         = host R1
+        * R2         = host R2
+        * R3         = guest PC
+        * R4         = guest MSR
+        * R5         = guest DAR
+        * R6         = guest DSISR
+        * R12        = exit handler id
+        * R13        = PACA
+        * PACA.KVM.* = guest *
+        *
+        */
 
        /* RFI into the highmem handler */
-       mfmsr   r0
-       ori     r0, r0, MSR_IR|MSR_DR|MSR_RI    /* Enable paging */
-       mtsrr1  r0
-       ld      r0, PACASAVEDMSR(r13)           /* Highmem handler address */
-       mtsrr0  r0
-
-       mfspr   r0, SPRN_SPRG_SCRATCH0
+       mfmsr   r7
+       ori     r7, r7, MSR_IR|MSR_DR|MSR_RI    /* Enable paging */
+       mtsrr1  r7
+       ld      r8, PACA_KVM_VMHANDLER(r13)     /* Highmem handler address */
+       mtsrr0  r8
 
        RFI
 kvmppc_handler_trampoline_exit_end:
index 06f5a9ecc42c9fc950666b44a177b7eadeb668ed..4d686cc6b260a797bbc7dffaa1467b574901714f 100644 (file)
@@ -69,10 +69,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 
        for (i = 0; i < 32; i += 4) {
                printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
-                      vcpu->arch.gpr[i],
-                      vcpu->arch.gpr[i+1],
-                      vcpu->arch.gpr[i+2],
-                      vcpu->arch.gpr[i+3]);
+                      kvmppc_get_gpr(vcpu, i),
+                      kvmppc_get_gpr(vcpu, i+1),
+                      kvmppc_get_gpr(vcpu, i+2),
+                      kvmppc_get_gpr(vcpu, i+3));
        }
 }
 
@@ -82,8 +82,32 @@ static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
        set_bit(priority, &vcpu->arch.pending_exceptions);
 }
 
-void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
+                                        ulong dear_flags, ulong esr_flags)
 {
+       vcpu->arch.queued_dear = dear_flags;
+       vcpu->arch.queued_esr = esr_flags;
+       kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
+}
+
+static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
+                                           ulong dear_flags, ulong esr_flags)
+{
+       vcpu->arch.queued_dear = dear_flags;
+       vcpu->arch.queued_esr = esr_flags;
+       kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
+}
+
+static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
+                                           ulong esr_flags)
+{
+       vcpu->arch.queued_esr = esr_flags;
+       kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
+}
+
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
+{
+       vcpu->arch.queued_esr = esr_flags;
        kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
 }
 
@@ -97,6 +121,11 @@ int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
        return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
 }
 
+void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
+{
+       clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
 {
@@ -109,14 +138,19 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 {
        int allowed = 0;
        ulong msr_mask;
+       bool update_esr = false, update_dear = false;
 
        switch (priority) {
-       case BOOKE_IRQPRIO_PROGRAM:
        case BOOKE_IRQPRIO_DTLB_MISS:
-       case BOOKE_IRQPRIO_ITLB_MISS:
-       case BOOKE_IRQPRIO_SYSCALL:
        case BOOKE_IRQPRIO_DATA_STORAGE:
+               update_dear = true;
+               /* fall through */
        case BOOKE_IRQPRIO_INST_STORAGE:
+       case BOOKE_IRQPRIO_PROGRAM:
+               update_esr = true;
+               /* fall through */
+       case BOOKE_IRQPRIO_ITLB_MISS:
+       case BOOKE_IRQPRIO_SYSCALL:
        case BOOKE_IRQPRIO_FP_UNAVAIL:
        case BOOKE_IRQPRIO_SPE_UNAVAIL:
        case BOOKE_IRQPRIO_SPE_FP_DATA:
@@ -151,6 +185,10 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
                vcpu->arch.srr0 = vcpu->arch.pc;
                vcpu->arch.srr1 = vcpu->arch.msr;
                vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
+               if (update_esr == true)
+                       vcpu->arch.esr = vcpu->arch.queued_esr;
+               if (update_dear == true)
+                       vcpu->arch.dear = vcpu->arch.queued_dear;
                kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
 
                clear_bit(priority, &vcpu->arch.pending_exceptions);
@@ -223,8 +261,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                if (vcpu->arch.msr & MSR_PR) {
                        /* Program traps generated by user-level software must be handled
                         * by the guest kernel. */
-                       vcpu->arch.esr = vcpu->arch.fault_esr;
-                       kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
+                       kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
                        r = RESUME_GUEST;
                        kvmppc_account_exit(vcpu, USR_PR_INST);
                        break;
@@ -280,16 +317,14 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                break;
 
        case BOOKE_INTERRUPT_DATA_STORAGE:
-               vcpu->arch.dear = vcpu->arch.fault_dear;
-               vcpu->arch.esr = vcpu->arch.fault_esr;
-               kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
+               kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
+                                              vcpu->arch.fault_esr);
                kvmppc_account_exit(vcpu, DSI_EXITS);
                r = RESUME_GUEST;
                break;
 
        case BOOKE_INTERRUPT_INST_STORAGE:
-               vcpu->arch.esr = vcpu->arch.fault_esr;
-               kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
+               kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr);
                kvmppc_account_exit(vcpu, ISI_EXITS);
                r = RESUME_GUEST;
                break;
@@ -310,9 +345,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
                if (gtlb_index < 0) {
                        /* The guest didn't have a mapping for it. */
-                       kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
-                       vcpu->arch.dear = vcpu->arch.fault_dear;
-                       vcpu->arch.esr = vcpu->arch.fault_esr;
+                       kvmppc_core_queue_dtlb_miss(vcpu,
+                                                   vcpu->arch.fault_dear,
+                                                   vcpu->arch.fault_esr);
                        kvmppc_mmu_dtlb_miss(vcpu);
                        kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
                        r = RESUME_GUEST;
@@ -426,7 +461,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
        vcpu->arch.pc = 0;
        vcpu->arch.msr = 0;
-       vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
+       kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
 
        vcpu->arch.shadow_pid = 1;
 
@@ -444,10 +479,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        int i;
 
        regs->pc = vcpu->arch.pc;
-       regs->cr = vcpu->arch.cr;
+       regs->cr = kvmppc_get_cr(vcpu);
        regs->ctr = vcpu->arch.ctr;
        regs->lr = vcpu->arch.lr;
-       regs->xer = vcpu->arch.xer;
+       regs->xer = kvmppc_get_xer(vcpu);
        regs->msr = vcpu->arch.msr;
        regs->srr0 = vcpu->arch.srr0;
        regs->srr1 = vcpu->arch.srr1;
@@ -461,7 +496,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        regs->sprg7 = vcpu->arch.sprg6;
 
        for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
-               regs->gpr[i] = vcpu->arch.gpr[i];
+               regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
 
        return 0;
 }
@@ -471,10 +506,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        int i;
 
        vcpu->arch.pc = regs->pc;
-       vcpu->arch.cr = regs->cr;
+       kvmppc_set_cr(vcpu, regs->cr);
        vcpu->arch.ctr = regs->ctr;
        vcpu->arch.lr = regs->lr;
-       vcpu->arch.xer = regs->xer;
+       kvmppc_set_xer(vcpu, regs->xer);
        kvmppc_set_msr(vcpu, regs->msr);
        vcpu->arch.srr0 = regs->srr0;
        vcpu->arch.srr1 = regs->srr1;
@@ -486,8 +521,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        vcpu->arch.sprg6 = regs->sprg5;
        vcpu->arch.sprg7 = regs->sprg6;
 
-       for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
-               vcpu->arch.gpr[i] = regs->gpr[i];
+       for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+               kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
 
        return 0;
 }
index aebc65e93f4b41782d423fad656e0ba154277f8a..cbc790ee192892fcc19f7a7d856fb1a2db6da2b6 100644 (file)
@@ -62,20 +62,20 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
                case OP_31_XOP_MFMSR:
                        rt = get_rt(inst);
-                       vcpu->arch.gpr[rt] = vcpu->arch.msr;
+                       kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr);
                        kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
                        break;
 
                case OP_31_XOP_MTMSR:
                        rs = get_rs(inst);
                        kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
-                       kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+                       kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
                        break;
 
                case OP_31_XOP_WRTEE:
                        rs = get_rs(inst);
                        vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
-                                                        | (vcpu->arch.gpr[rs] & MSR_EE);
+                                       | (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
                        kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
                        break;
 
@@ -101,22 +101,23 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 {
        int emulated = EMULATE_DONE;
+       ulong spr_val = kvmppc_get_gpr(vcpu, rs);
 
        switch (sprn) {
        case SPRN_DEAR:
-               vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+               vcpu->arch.dear = spr_val; break;
        case SPRN_ESR:
-               vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+               vcpu->arch.esr = spr_val; break;
        case SPRN_DBCR0:
-               vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+               vcpu->arch.dbcr0 = spr_val; break;
        case SPRN_DBCR1:
-               vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+               vcpu->arch.dbcr1 = spr_val; break;
        case SPRN_DBSR:
-               vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break;
+               vcpu->arch.dbsr &= ~spr_val; break;
        case SPRN_TSR:
-               vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+               vcpu->arch.tsr &= ~spr_val; break;
        case SPRN_TCR:
-               vcpu->arch.tcr = vcpu->arch.gpr[rs];
+               vcpu->arch.tcr = spr_val;
                kvmppc_emulate_dec(vcpu);
                break;
 
@@ -124,64 +125,64 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
         * loaded into the real SPRGs when resuming the
         * guest. */
        case SPRN_SPRG4:
-               vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+               vcpu->arch.sprg4 = spr_val; break;
        case SPRN_SPRG5:
-               vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+               vcpu->arch.sprg5 = spr_val; break;
        case SPRN_SPRG6:
-               vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+               vcpu->arch.sprg6 = spr_val; break;
        case SPRN_SPRG7:
-               vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+               vcpu->arch.sprg7 = spr_val; break;
 
        case SPRN_IVPR:
-               vcpu->arch.ivpr = vcpu->arch.gpr[rs];
+               vcpu->arch.ivpr = spr_val;
                break;
        case SPRN_IVOR0:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
                break;
        case SPRN_IVOR1:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = spr_val;
                break;
        case SPRN_IVOR2:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
                break;
        case SPRN_IVOR3:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
                break;
        case SPRN_IVOR4:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = spr_val;
                break;
        case SPRN_IVOR5:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = spr_val;
                break;
        case SPRN_IVOR6:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = spr_val;
                break;
        case SPRN_IVOR7:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = spr_val;
                break;
        case SPRN_IVOR8:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
                break;
        case SPRN_IVOR9:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
                break;
        case SPRN_IVOR10:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = spr_val;
                break;
        case SPRN_IVOR11:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = spr_val;
                break;
        case SPRN_IVOR12:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = spr_val;
                break;
        case SPRN_IVOR13:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = spr_val;
                break;
        case SPRN_IVOR14:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = spr_val;
                break;
        case SPRN_IVOR15:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
                break;
 
        default:
@@ -197,65 +198,65 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 
        switch (sprn) {
        case SPRN_IVPR:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
        case SPRN_DEAR:
-               vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break;
        case SPRN_ESR:
-               vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
        case SPRN_DBCR0:
-               vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
        case SPRN_DBCR1:
-               vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
        case SPRN_DBSR:
-               vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
 
        case SPRN_IVOR0:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
                break;
        case SPRN_IVOR1:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
                break;
        case SPRN_IVOR2:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
                break;
        case SPRN_IVOR3:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
                break;
        case SPRN_IVOR4:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
                break;
        case SPRN_IVOR5:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
                break;
        case SPRN_IVOR6:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
                break;
        case SPRN_IVOR7:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
                break;
        case SPRN_IVOR8:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
                break;
        case SPRN_IVOR9:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
                break;
        case SPRN_IVOR10:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
                break;
        case SPRN_IVOR11:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]);
                break;
        case SPRN_IVOR12:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
                break;
        case SPRN_IVOR13:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]);
                break;
        case SPRN_IVOR14:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]);
                break;
        case SPRN_IVOR15:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]);
                break;
 
        default:
index 64949eef43f12994f9d75ee7eb86eb11d03b2d5b..efa1198940ab6b550a9f2e9ccf99d88692e43e61 100644 (file)
@@ -60,6 +60,12 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 
        kvmppc_e500_tlb_setup(vcpu_e500);
 
+       /* Registers init */
+       vcpu->arch.pvr = mfspr(SPRN_PVR);
+
+       /* Since booke kvm only support one core, update all vcpus' PIR to 0 */
+       vcpu->vcpu_id = 0;
+
        return 0;
 }
 
index be95b8d8e3b78506491f9075cc9e1a0146faeff3..8e3edfbc963412e39813dbb7ccff1b8a57bdcfd0 100644 (file)
@@ -74,54 +74,59 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 {
        struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
        int emulated = EMULATE_DONE;
+       ulong spr_val = kvmppc_get_gpr(vcpu, rs);
 
        switch (sprn) {
        case SPRN_PID:
                vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
-                       vcpu->arch.pid = vcpu->arch.gpr[rs];
+                       vcpu->arch.pid = spr_val;
                break;
        case SPRN_PID1:
-               vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break;
+               vcpu_e500->pid[1] = spr_val; break;
        case SPRN_PID2:
-               vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break;
+               vcpu_e500->pid[2] = spr_val; break;
        case SPRN_MAS0:
-               vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break;
+               vcpu_e500->mas0 = spr_val; break;
        case SPRN_MAS1:
-               vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break;
+               vcpu_e500->mas1 = spr_val; break;
        case SPRN_MAS2:
-               vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break;
+               vcpu_e500->mas2 = spr_val; break;
        case SPRN_MAS3:
-               vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break;
+               vcpu_e500->mas3 = spr_val; break;
        case SPRN_MAS4:
-               vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break;
+               vcpu_e500->mas4 = spr_val; break;
        case SPRN_MAS6:
-               vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break;
+               vcpu_e500->mas6 = spr_val; break;
        case SPRN_MAS7:
-               vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break;
+               vcpu_e500->mas7 = spr_val; break;
+       case SPRN_L1CSR0:
+               vcpu_e500->l1csr0 = spr_val;
+               vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
+               break;
        case SPRN_L1CSR1:
-               vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break;
+               vcpu_e500->l1csr1 = spr_val; break;
        case SPRN_HID0:
-               vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break;
+               vcpu_e500->hid0 = spr_val; break;
        case SPRN_HID1:
-               vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break;
+               vcpu_e500->hid1 = spr_val; break;
 
        case SPRN_MMUCSR0:
                emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
-                               vcpu->arch.gpr[rs]);
+                               spr_val);
                break;
 
        /* extra exceptions */
        case SPRN_IVOR32:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
                break;
        case SPRN_IVOR33:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = spr_val;
                break;
        case SPRN_IVOR34:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
                break;
        case SPRN_IVOR35:
-               vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs];
+               vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
                break;
 
        default:
@@ -138,63 +143,57 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 
        switch (sprn) {
        case SPRN_PID:
-               vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break;
        case SPRN_PID1:
-               vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break;
        case SPRN_PID2:
-               vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
        case SPRN_MAS0:
-               vcpu->arch.gpr[rt] = vcpu_e500->mas0; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break;
        case SPRN_MAS1:
-               vcpu->arch.gpr[rt] = vcpu_e500->mas1; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break;
        case SPRN_MAS2:
-               vcpu->arch.gpr[rt] = vcpu_e500->mas2; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
        case SPRN_MAS3:
-               vcpu->arch.gpr[rt] = vcpu_e500->mas3; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
        case SPRN_MAS4:
-               vcpu->arch.gpr[rt] = vcpu_e500->mas4; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
        case SPRN_MAS6:
-               vcpu->arch.gpr[rt] = vcpu_e500->mas6; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
        case SPRN_MAS7:
-               vcpu->arch.gpr[rt] = vcpu_e500->mas7; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
 
        case SPRN_TLB0CFG:
-               vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG);
-               vcpu->arch.gpr[rt] &= ~0xfffUL;
-               vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0];
-               break;
-
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
        case SPRN_TLB1CFG:
-               vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG);
-               vcpu->arch.gpr[rt] &= ~0xfffUL;
-               vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1];
-               break;
-
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break;
+       case SPRN_L1CSR0:
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break;
        case SPRN_L1CSR1:
-               vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break;
        case SPRN_HID0:
-               vcpu->arch.gpr[rt] = vcpu_e500->hid0; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
        case SPRN_HID1:
-               vcpu->arch.gpr[rt] = vcpu_e500->hid1; break;
+               kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
 
        case SPRN_MMUCSR0:
-               vcpu->arch.gpr[rt] = 0; break;
+               kvmppc_set_gpr(vcpu, rt, 0); break;
 
        case SPRN_MMUCFG:
-               vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break;
+               kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break;
 
        /* extra exceptions */
        case SPRN_IVOR32:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]);
                break;
        case SPRN_IVOR33:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]);
                break;
        case SPRN_IVOR34:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]);
                break;
        case SPRN_IVOR35:
-               vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+               kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]);
                break;
        default:
                emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
index fb1e1dc11ba5e9fed05d4dfa82dc979f3389bb49..0d772e6b6318cf30f081b5ec76c9bfe035ca5250 100644 (file)
@@ -417,7 +417,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
        int esel, tlbsel;
        gva_t ea;
 
-       ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb];
+       ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
 
        ia = (ea >> 2) & 0x1;
 
@@ -470,7 +470,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
        struct tlbe *gtlbe = NULL;
        gva_t ea;
 
-       ea = vcpu->arch.gpr[rb];
+       ea = kvmppc_get_gpr(vcpu, rb);
 
        for (tlbsel = 0; tlbsel < 2; tlbsel++) {
                esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
@@ -728,6 +728,12 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
        if (vcpu_e500->shadow_pages[1] == NULL)
                goto err_out_page0;
 
+       /* Init TLB configuration register */
+       vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
+       vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0];
+       vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
+       vcpu_e500->tlb1cfg |= vcpu_e500->guest_tlb_size[1];
+
        return 0;
 
 err_out_page0:
index 4a9ac6640fadb93182d9ade73310f1af1e0c4b93..cb72a65f4eccc01bcac5a431a090df8f36a9a374 100644 (file)
@@ -83,6 +83,9 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
 
        pr_debug("mtDEC: %x\n", vcpu->arch.dec);
 #ifdef CONFIG_PPC64
+       /* mtdec lowers the interrupt line when positive. */
+       kvmppc_core_dequeue_dec(vcpu);
+
        /* POWER4+ triggers a dec interrupt if the value is < 0 */
        if (vcpu->arch.dec & 0x80000000) {
                hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
@@ -140,14 +143,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
        pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
 
+       /* Try again next time */
+       if (inst == KVM_INST_FETCH_FAILED)
+               return EMULATE_DONE;
+
        switch (get_op(inst)) {
        case OP_TRAP:
 #ifdef CONFIG_PPC64
        case OP_TRAP_64:
+               kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
 #else
-               vcpu->arch.esr |= ESR_PTR;
+               kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR);
 #endif
-               kvmppc_core_queue_program(vcpu);
                advance = 0;
                break;
 
@@ -167,14 +174,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                case OP_31_XOP_STWX:
                        rs = get_rs(inst);
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      vcpu->arch.gpr[rs],
+                                                      kvmppc_get_gpr(vcpu, rs),
                                                       4, 1);
                        break;
 
                case OP_31_XOP_STBX:
                        rs = get_rs(inst);
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      vcpu->arch.gpr[rs],
+                                                      kvmppc_get_gpr(vcpu, rs),
                                                       1, 1);
                        break;
 
@@ -183,14 +190,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ra = get_ra(inst);
                        rb = get_rb(inst);
 
-                       ea = vcpu->arch.gpr[rb];
+                       ea = kvmppc_get_gpr(vcpu, rb);
                        if (ra)
-                               ea += vcpu->arch.gpr[ra];
+                               ea += kvmppc_get_gpr(vcpu, ra);
 
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      vcpu->arch.gpr[rs],
+                                                      kvmppc_get_gpr(vcpu, rs),
                                                       1, 1);
-                       vcpu->arch.gpr[rs] = ea;
+                       kvmppc_set_gpr(vcpu, rs, ea);
                        break;
 
                case OP_31_XOP_LHZX:
@@ -203,12 +210,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ra = get_ra(inst);
                        rb = get_rb(inst);
 
-                       ea = vcpu->arch.gpr[rb];
+                       ea = kvmppc_get_gpr(vcpu, rb);
                        if (ra)
-                               ea += vcpu->arch.gpr[ra];
+                               ea += kvmppc_get_gpr(vcpu, ra);
 
                        emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-                       vcpu->arch.gpr[ra] = ea;
+                       kvmppc_set_gpr(vcpu, ra, ea);
                        break;
 
                case OP_31_XOP_MFSPR:
@@ -217,47 +224,49 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
                        switch (sprn) {
                        case SPRN_SRR0:
-                               vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
+                               kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break;
                        case SPRN_SRR1:
-                               vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
+                               kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break;
                        case SPRN_PVR:
-                               vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
+                               kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
                        case SPRN_PIR:
-                               vcpu->arch.gpr[rt] = vcpu->vcpu_id; break;
+                               kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break;
                        case SPRN_MSSSR0:
-                               vcpu->arch.gpr[rt] = 0; break;
+                               kvmppc_set_gpr(vcpu, rt, 0); break;
 
                        /* Note: mftb and TBRL/TBWL are user-accessible, so
                         * the guest can always access the real TB anyways.
                         * In fact, we probably will never see these traps. */
                        case SPRN_TBWL:
-                               vcpu->arch.gpr[rt] = get_tb() >> 32; break;
+                               kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break;
                        case SPRN_TBWU:
-                               vcpu->arch.gpr[rt] = get_tb(); break;
+                               kvmppc_set_gpr(vcpu, rt, get_tb()); break;
 
                        case SPRN_SPRG0:
-                               vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break;
+                               kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break;
                        case SPRN_SPRG1:
-                               vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break;
+                               kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break;
                        case SPRN_SPRG2:
-                               vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break;
+                               kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break;
                        case SPRN_SPRG3:
-                               vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break;
+                               kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break;
                        /* Note: SPRG4-7 are user-readable, so we don't get
                         * a trap. */
 
                        case SPRN_DEC:
                        {
                                u64 jd = get_tb() - vcpu->arch.dec_jiffies;
-                               vcpu->arch.gpr[rt] = vcpu->arch.dec - jd;
-                               pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n", vcpu->arch.dec, jd, vcpu->arch.gpr[rt]);
+                               kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
+                               pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n",
+                                        vcpu->arch.dec, jd,
+                                        kvmppc_get_gpr(vcpu, rt));
                                break;
                        }
                        default:
                                emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
                                if (emulated == EMULATE_FAIL) {
                                        printk("mfspr: unknown spr %x\n", sprn);
-                                       vcpu->arch.gpr[rt] = 0;
+                                       kvmppc_set_gpr(vcpu, rt, 0);
                                }
                                break;
                        }
@@ -269,7 +278,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        rb = get_rb(inst);
 
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      vcpu->arch.gpr[rs],
+                                                      kvmppc_get_gpr(vcpu, rs),
                                                       2, 1);
                        break;
 
@@ -278,14 +287,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        ra = get_ra(inst);
                        rb = get_rb(inst);
 
-                       ea = vcpu->arch.gpr[rb];
+                       ea = kvmppc_get_gpr(vcpu, rb);
                        if (ra)
-                               ea += vcpu->arch.gpr[ra];
+                               ea += kvmppc_get_gpr(vcpu, ra);
 
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      vcpu->arch.gpr[rs],
+                                                      kvmppc_get_gpr(vcpu, rs),
                                                       2, 1);
-                       vcpu->arch.gpr[ra] = ea;
+                       kvmppc_set_gpr(vcpu, ra, ea);
                        break;
 
                case OP_31_XOP_MTSPR:
@@ -293,9 +302,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        rs = get_rs(inst);
                        switch (sprn) {
                        case SPRN_SRR0:
-                               vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
+                               vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break;
                        case SPRN_SRR1:
-                               vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
+                               vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break;
 
                        /* XXX We need to context-switch the timebase for
                         * watchdog and FIT. */
@@ -305,18 +314,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        case SPRN_MSSSR0: break;
 
                        case SPRN_DEC:
-                               vcpu->arch.dec = vcpu->arch.gpr[rs];
+                               vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs);
                                kvmppc_emulate_dec(vcpu);
                                break;
 
                        case SPRN_SPRG0:
-                               vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
+                               vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break;
                        case SPRN_SPRG1:
-                               vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break;
+                               vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break;
                        case SPRN_SPRG2:
-                               vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break;
+                               vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break;
                        case SPRN_SPRG3:
-                               vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
+                               vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break;
 
                        default:
                                emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
@@ -348,7 +357,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        rb = get_rb(inst);
 
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      vcpu->arch.gpr[rs],
+                                                      kvmppc_get_gpr(vcpu, rs),
                                                       4, 0);
                        break;
 
@@ -363,7 +372,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        rb = get_rb(inst);
 
                        emulated = kvmppc_handle_store(run, vcpu,
-                                                      vcpu->arch.gpr[rs],
+                                                      kvmppc_get_gpr(vcpu, rs),
                                                       2, 0);
                        break;
 
@@ -382,7 +391,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ra = get_ra(inst);
                rt = get_rt(inst);
                emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
-               vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+               kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
                break;
 
        case OP_LBZ:
@@ -394,35 +403,39 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ra = get_ra(inst);
                rt = get_rt(inst);
                emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-               vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+               kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
                break;
 
        case OP_STW:
                rs = get_rs(inst);
-               emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+               emulated = kvmppc_handle_store(run, vcpu,
+                                              kvmppc_get_gpr(vcpu, rs),
                                               4, 1);
                break;
 
        case OP_STWU:
                ra = get_ra(inst);
                rs = get_rs(inst);
-               emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+               emulated = kvmppc_handle_store(run, vcpu,
+                                              kvmppc_get_gpr(vcpu, rs),
                                               4, 1);
-               vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+               kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
                break;
 
        case OP_STB:
                rs = get_rs(inst);
-               emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+               emulated = kvmppc_handle_store(run, vcpu,
+                                              kvmppc_get_gpr(vcpu, rs),
                                               1, 1);
                break;
 
        case OP_STBU:
                ra = get_ra(inst);
                rs = get_rs(inst);
-               emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+               emulated = kvmppc_handle_store(run, vcpu,
+                                              kvmppc_get_gpr(vcpu, rs),
                                               1, 1);
-               vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+               kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
                break;
 
        case OP_LHZ:
@@ -434,21 +447,23 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ra = get_ra(inst);
                rt = get_rt(inst);
                emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-               vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+               kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
                break;
 
        case OP_STH:
                rs = get_rs(inst);
-               emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+               emulated = kvmppc_handle_store(run, vcpu,
+                                              kvmppc_get_gpr(vcpu, rs),
                                               2, 1);
                break;
 
        case OP_STHU:
                ra = get_ra(inst);
                rs = get_rs(inst);
-               emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+               emulated = kvmppc_handle_store(run, vcpu,
+                                              kvmppc_get_gpr(vcpu, rs),
                                               2, 1);
-               vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+               kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
                break;
 
        default:
@@ -461,6 +476,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
                        advance = 0;
                        printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
                               "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
+                       kvmppc_core_queue_program(vcpu, 0);
                }
        }
 
index f06cf93b178ec2d037a39eeee202b567a42d6eb2..51aedd7f16bcb14d5d2f073da2fc55a53ec18575 100644 (file)
@@ -137,6 +137,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 {
        kvmppc_free_vcpus(kvm);
        kvm_free_physmem(kvm);
+       cleanup_srcu_struct(&kvm->srcu);
        kfree(kvm);
 }
 
@@ -165,14 +166,24 @@ long kvm_arch_dev_ioctl(struct file *filp,
        return -EINVAL;
 }
 
-int kvm_arch_set_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
-                               struct kvm_memory_slot old,
-                               int user_alloc)
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+                                   struct kvm_memory_slot *memslot,
+                                   struct kvm_memory_slot old,
+                                   struct kvm_userspace_memory_region *mem,
+                                   int user_alloc)
 {
        return 0;
 }
 
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+               struct kvm_userspace_memory_region *mem,
+               struct kvm_memory_slot old,
+               int user_alloc)
+{
+       return;
+}
+
+
 void kvm_arch_flush_shadow(struct kvm *kvm)
 {
 }
@@ -260,34 +271,35 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
                                      struct kvm_run *run)
 {
-       ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
-       *gpr = run->dcr.data;
+       kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
 }
 
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
-       ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+       ulong gpr;
 
-       if (run->mmio.len > sizeof(*gpr)) {
+       if (run->mmio.len > sizeof(gpr)) {
                printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
                return;
        }
 
        if (vcpu->arch.mmio_is_bigendian) {
                switch (run->mmio.len) {
-               case 4: *gpr = *(u32 *)run->mmio.data; break;
-               case 2: *gpr = *(u16 *)run->mmio.data; break;
-               case 1: *gpr = *(u8 *)run->mmio.data; break;
+               case 4: gpr = *(u32 *)run->mmio.data; break;
+               case 2: gpr = *(u16 *)run->mmio.data; break;
+               case 1: gpr = *(u8 *)run->mmio.data; break;
                }
        } else {
                /* Convert BE data from userland back to LE. */
                switch (run->mmio.len) {
-               case 4: *gpr = ld_le32((u32 *)run->mmio.data); break;
-               case 2: *gpr = ld_le16((u16 *)run->mmio.data); break;
-               case 1: *gpr = *(u8 *)run->mmio.data; break;
+               case 4: gpr = ld_le32((u32 *)run->mmio.data); break;
+               case 2: gpr = ld_le16((u16 *)run->mmio.data); break;
+               case 1: gpr = *(u8 *)run->mmio.data; break;
                }
        }
+
+       kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
 }
 
 int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
index 3fa0a10e4668f1ec5df6b0a892ba11b4131cd7c9..49292869a5cdbeb18cb2a3a17e46a25f68d21b55 100644 (file)
@@ -242,6 +242,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
        kvm_free_physmem(kvm);
        free_page((unsigned long)(kvm->arch.sca));
        debug_unregister(kvm->arch.dbf);
+       cleanup_srcu_struct(&kvm->srcu);
        kfree(kvm);
 }
 
@@ -690,14 +691,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 }
 
 /* Section: memory related */
-int kvm_arch_set_memory_region(struct kvm *kvm,
-                               struct kvm_userspace_memory_region *mem,
-                               struct kvm_memory_slot old,
-                               int user_alloc)
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+                                  struct kvm_memory_slot *memslot,
+                                  struct kvm_memory_slot old,
+                                  struct kvm_userspace_memory_region *mem,
+                                  int user_alloc)
 {
-       int i;
-       struct kvm_vcpu *vcpu;
-
        /* A few sanity checks. We can have exactly one memory slot which has
           to start at guest virtual zero and which has to be located at a
           page boundary in userland and which has to end at a page boundary.
@@ -720,14 +719,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
        if (!user_alloc)
                return -EINVAL;
 
+       return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+                               struct kvm_userspace_memory_region *mem,
+                               struct kvm_memory_slot old,
+                               int user_alloc)
+{
+       int i;
+       struct kvm_vcpu *vcpu;
+
        /* request update of sie control block for all available vcpus */
        kvm_for_each_vcpu(i, vcpu, kvm) {
                if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
                        continue;
                kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
        }
-
-       return 0;
 }
 
 void kvm_arch_flush_shadow(struct kvm *kvm)
index 06cce8285ba01883ee2e4f97a126ff6b50ea11d5..60f09ab3672c9d26453395f70addbb45f6a77d6c 100644 (file)
@@ -67,10 +67,14 @@ static inline long kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu)
 
 static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
 {
+       int idx;
        struct kvm_memory_slot *mem;
+       struct kvm_memslots *memslots;
 
-       down_read(&vcpu->kvm->slots_lock);
-       mem = &vcpu->kvm->memslots[0];
+       idx = srcu_read_lock(&vcpu->kvm->srcu);
+       memslots = rcu_dereference(vcpu->kvm->memslots);
+
+       mem = &memslots->memslots[0];
 
        vcpu->arch.sie_block->gmsor = mem->userspace_addr;
        vcpu->arch.sie_block->gmslm =
@@ -78,7 +82,7 @@ static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
                (mem->npages << PAGE_SHIFT) +
                VIRTIODESCSPACE - 1ul;
 
-       up_read(&vcpu->kvm->slots_lock);
+       srcu_read_unlock(&vcpu->kvm->srcu, idx);
 }
 
 /* implemented in priv.c */
index 9f828f87ca35f418d24d4b7674477f643eea773d..493092efaa3bb67034a535634e15043660881be9 100644 (file)
@@ -11,6 +11,7 @@ header-y += sigcontext32.h
 header-y += ucontext.h
 header-y += processor-flags.h
 header-y += hw_breakpoint.h
+header-y += hyperv.h
 
 unifdef-y += e820.h
 unifdef-y += ist.h
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h
new file mode 100644 (file)
index 0000000..e153a2b
--- /dev/null
@@ -0,0 +1,186 @@
+#ifndef _ASM_X86_KVM_HYPERV_H
+#define _ASM_X86_KVM_HYPERV_H
+
+#include <linux/types.h>
+
+/*
+ * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
+ * is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
+ */
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS  0x40000000
+#define HYPERV_CPUID_INTERFACE                 0x40000001
+#define HYPERV_CPUID_VERSION                   0x40000002
+#define HYPERV_CPUID_FEATURES                  0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO          0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS          0x40000005
+
+/*
+ * Feature identification. EAX indicates which features are available
+ * to the partition based upon the current partition privileges.
+ */
+
+/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
+#define HV_X64_MSR_VP_RUNTIME_AVAILABLE                (1 << 0)
+/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
+#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE    (1 << 1)
+/*
+ * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
+ * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
+ */
+#define HV_X64_MSR_SYNIC_AVAILABLE             (1 << 2)
+/*
+ * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
+ * HV_X64_MSR_STIMER3_COUNT) available
+ */
+#define HV_X64_MSR_SYNTIMER_AVAILABLE          (1 << 3)
+/*
+ * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
+ * are available
+ */
+#define HV_X64_MSR_APIC_ACCESS_AVAILABLE       (1 << 4)
+/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
+#define HV_X64_MSR_HYPERCALL_AVAILABLE         (1 << 5)
+/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
+#define HV_X64_MSR_VP_INDEX_AVAILABLE          (1 << 6)
+/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
+#define HV_X64_MSR_RESET_AVAILABLE             (1 << 7)
+ /*
+  * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
+  * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
+  * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
+  */
+#define HV_X64_MSR_STAT_PAGES_AVAILABLE                (1 << 8)
+
+/*
+ * Feature identification: EBX indicates which flags were specified at
+ * partition creation. The format is the same as the partition creation
+ * flag structure defined in section Partition Creation Flags.
+ */
+#define HV_X64_CREATE_PARTITIONS               (1 << 0)
+#define HV_X64_ACCESS_PARTITION_ID             (1 << 1)
+#define HV_X64_ACCESS_MEMORY_POOL              (1 << 2)
+#define HV_X64_ADJUST_MESSAGE_BUFFERS          (1 << 3)
+#define HV_X64_POST_MESSAGES                   (1 << 4)
+#define HV_X64_SIGNAL_EVENTS                   (1 << 5)
+#define HV_X64_CREATE_PORT                     (1 << 6)
+#define HV_X64_CONNECT_PORT                    (1 << 7)
+#define HV_X64_ACCESS_STATS                    (1 << 8)
+#define HV_X64_DEBUGGING                       (1 << 11)
+#define HV_X64_CPU_POWER_MANAGEMENT            (1 << 12)
+#define HV_X64_CONFIGURE_PROFILER              (1 << 13)
+
+/*
+ * Feature identification. EDX indicates which miscellaneous features
+ * are available to the partition.
+ */
+/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
+#define HV_X64_MWAIT_AVAILABLE                         (1 << 0)
+/* Guest debugging support is available */
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE               (1 << 1)
+/* Performance Monitor support is available*/
+#define HV_X64_PERF_MONITOR_AVAILABLE                  (1 << 2)
+/* Support for physical CPU dynamic partitioning events is available*/
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE      (1 << 3)
+/*
+ * Support for passing hypercall input parameter block via XMM
+ * registers is available
+ */
+#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE          (1 << 4)
+/* Support for a virtual guest idle state is available */
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE              (1 << 5)
+
+/*
+ * Implementation recommendations. Indicates which behaviors the hypervisor
+ * recommends the OS implement for optimal performance.
+ */
+ /*
+  * Recommend using hypercall for address space switches rather
+  * than MOV to CR3 instruction
+  */
+#define HV_X64_MWAIT_RECOMMENDED               (1 << 0)
+/* Recommend using hypercall for local TLB flushes rather
+ * than INVLPG or MOV to CR3 instructions */
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED     (1 << 1)
+/*
+ * Recommend using hypercall for remote TLB flushes rather
+ * than inter-processor interrupts
+ */
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED    (1 << 2)
+/*
+ * Recommend using MSRs for accessing APIC registers
+ * EOI, ICR and TPR rather than their memory-mapped counterparts
+ */
+#define HV_X64_APIC_ACCESS_RECOMMENDED         (1 << 3)
+/* Recommend using the hypervisor-provided MSR to initiate a system RESET */
+#define HV_X64_SYSTEM_RESET_RECOMMENDED                (1 << 4)
+/*
+ * Recommend using relaxed timing for this partition. If used,
+ * the VM should disable any watchdog timeouts that rely on the
+ * timely delivery of external interrupts
+ */
+#define HV_X64_RELAXED_TIMING_RECOMMENDED      (1 << 5)
+
+/* MSR used to identify the guest OS. */
+#define HV_X64_MSR_GUEST_OS_ID                 0x40000000
+
+/* MSR used to setup pages used to communicate with the hypervisor. */
+#define HV_X64_MSR_HYPERCALL                   0x40000001
+
+/* MSR used to provide vcpu index */
+#define HV_X64_MSR_VP_INDEX                    0x40000002
+
+/* Define the virtual APIC registers */
+#define HV_X64_MSR_EOI                         0x40000070
+#define HV_X64_MSR_ICR                         0x40000071
+#define HV_X64_MSR_TPR                         0x40000072
+#define HV_X64_MSR_APIC_ASSIST_PAGE            0x40000073
+
+/* Define synthetic interrupt controller model specific registers. */
+#define HV_X64_MSR_SCONTROL                    0x40000080
+#define HV_X64_MSR_SVERSION                    0x40000081
+#define HV_X64_MSR_SIEFP                       0x40000082
+#define HV_X64_MSR_SIMP                                0x40000083
+#define HV_X64_MSR_EOM                         0x40000084
+#define HV_X64_MSR_SINT0                       0x40000090
+#define HV_X64_MSR_SINT1                       0x40000091
+#define HV_X64_MSR_SINT2                       0x40000092
+#define HV_X64_MSR_SINT3                       0x40000093
+#define HV_X64_MSR_SINT4                       0x40000094
+#define HV_X64_MSR_SINT5                       0x40000095
+#define HV_X64_MSR_SINT6                       0x40000096
+#define HV_X64_MSR_SINT7                       0x40000097
+#define HV_X64_MSR_SINT8                       0x40000098
+#define HV_X64_MSR_SINT9                       0x40000099
+#define HV_X64_MSR_SINT10                      0x4000009A
+#define HV_X64_MSR_SINT11                      0x4000009B
+#define HV_X64_MSR_SINT12                      0x4000009C
+#define HV_X64_MSR_SINT13                      0x4000009D
+#define HV_X64_MSR_SINT14                      0x4000009E
+#define HV_X64_MSR_SINT15                      0x4000009F
+
+
+#define HV_X64_MSR_HYPERCALL_ENABLE            0x00000001
+#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT        12
+#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
+               (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
+
+/* Declare the various hypercall operations. */
+#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT                0x0008
+
+#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE             0x00000001
+#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT      12
+#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK       \
+               (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+#define HV_PROCESSOR_POWER_STATE_C0            0
+#define HV_PROCESSOR_POWER_STATE_C1            1
+#define HV_PROCESSOR_POWER_STATE_C2            2
+#define HV_PROCESSOR_POWER_STATE_C3            3
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS                      0
+#define HV_STATUS_INVALID_HYPERCALL_CODE       2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT      3
+#define HV_STATUS_INVALID_ALIGNMENT            4
+
+#endif
index 7c18e1230f5490f1f7a58bd103302ffcb8bf20b1..7a6f54fa13ba426d4cca30862cc38e5e8e4a53cc 100644 (file)
@@ -54,13 +54,23 @@ struct x86_emulate_ctxt;
 struct x86_emulate_ops {
        /*
         * read_std: Read bytes of standard (non-emulated/special) memory.
-        *           Used for instruction fetch, stack operations, and others.
+        *           Used for descriptor reading.
         *  @addr:  [IN ] Linear address from which to read.
         *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
         *  @bytes: [IN ] Number of bytes to read from memory.
         */
        int (*read_std)(unsigned long addr, void *val,
-                       unsigned int bytes, struct kvm_vcpu *vcpu);
+                       unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
+
+       /*
+        * fetch: Read bytes of standard (non-emulated/special) memory.
+        *        Used for instruction fetch.
+        *  @addr:  [IN ] Linear address from which to read.
+        *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
+        *  @bytes: [IN ] Number of bytes to read from memory.
+        */
+       int (*fetch)(unsigned long addr, void *val,
+                       unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
 
        /*
         * read_emulated: Read bytes from emulated/special memory area.
@@ -74,7 +84,7 @@ struct x86_emulate_ops {
                             struct kvm_vcpu *vcpu);
 
        /*
-        * write_emulated: Read bytes from emulated/special memory area.
+        * write_emulated: Write bytes to emulated/special memory area.
         *  @addr:  [IN ] Linear address to which to write.
         *  @val:   [IN ] Value to write to memory (low-order bytes used as
         *                required).
@@ -168,6 +178,7 @@ struct x86_emulate_ctxt {
 
 /* Execution mode, passed to the emulator. */
 #define X86EMUL_MODE_REAL     0        /* Real mode.             */
+#define X86EMUL_MODE_VM86     1        /* Virtual 8086 mode.     */
 #define X86EMUL_MODE_PROT16   2        /* 16-bit protected mode. */
 #define X86EMUL_MODE_PROT32   4        /* 32-bit protected mode. */
 #define X86EMUL_MODE_PROT64   8        /* 64-bit (long) mode.    */
index 4f865e8b854096565f5644710b44a6765ce242cc..06d9e79ca37dccad56acb3050ee837c8156ff414 100644 (file)
@@ -25,7 +25,7 @@
 #include <asm/mtrr.h>
 #include <asm/msr-index.h>
 
-#define KVM_MAX_VCPUS 16
+#define KVM_MAX_VCPUS 64
 #define KVM_MEMORY_SLOTS 32
 /* memory slots that does not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 4
 #define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS |   \
                                  0xFFFFFF0000000000ULL)
 
-#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST                          \
-       (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
-#define KVM_GUEST_CR0_MASK                                             \
-       (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
-#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST                                \
-       (X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
-#define KVM_VM_CR0_ALWAYS_ON                                           \
-       (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
-#define KVM_GUEST_CR4_MASK                                             \
-       (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
-#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
-#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
-
 #define INVALID_PAGE (~(hpa_t)0)
 #define UNMAPPED_GVA (~(gpa_t)0)
 
@@ -256,7 +243,8 @@ struct kvm_mmu {
        void (*new_cr3)(struct kvm_vcpu *vcpu);
        int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
        void (*free)(struct kvm_vcpu *vcpu);
-       gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
+       gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
+                           u32 *error);
        void (*prefetch_page)(struct kvm_vcpu *vcpu,
                              struct kvm_mmu_page *page);
        int (*sync_page)(struct kvm_vcpu *vcpu,
@@ -282,13 +270,15 @@ struct kvm_vcpu_arch {
        u32 regs_dirty;
 
        unsigned long cr0;
+       unsigned long cr0_guest_owned_bits;
        unsigned long cr2;
        unsigned long cr3;
        unsigned long cr4;
+       unsigned long cr4_guest_owned_bits;
        unsigned long cr8;
        u32 hflags;
        u64 pdptrs[4]; /* pae */
-       u64 shadow_efer;
+       u64 efer;
        u64 apic_base;
        struct kvm_lapic *apic;    /* kernel irqchip context */
        int32_t apic_arb_prio;
@@ -374,17 +364,27 @@ struct kvm_vcpu_arch {
        /* used for guest single stepping over the given code position */
        u16 singlestep_cs;
        unsigned long singlestep_rip;
+       /* fields used by HYPER-V emulation */
+       u64 hv_vapic;
 };
 
 struct kvm_mem_alias {
        gfn_t base_gfn;
        unsigned long npages;
        gfn_t target_gfn;
+#define KVM_ALIAS_INVALID     1UL
+       unsigned long flags;
 };
 
-struct kvm_arch{
-       int naliases;
+#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION
+
+struct kvm_mem_aliases {
        struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
+       int naliases;
+};
+
+struct kvm_arch {
+       struct kvm_mem_aliases *aliases;
 
        unsigned int n_free_mmu_pages;
        unsigned int n_requested_mmu_pages;
@@ -416,6 +416,10 @@ struct kvm_arch{
        s64 kvmclock_offset;
 
        struct kvm_xen_hvm_config xen_hvm_config;
+
+       /* fields used by HYPER-V emulation */
+       u64 hv_guest_os_id;
+       u64 hv_hypercall;
 };
 
 struct kvm_vm_stat {
@@ -471,6 +475,7 @@ struct kvm_x86_ops {
        int (*hardware_setup)(void);               /* __init */
        void (*hardware_unsetup)(void);            /* __exit */
        bool (*cpu_has_accelerated_tpr)(void);
+       void (*cpuid_update)(struct kvm_vcpu *vcpu);
 
        /* Create, but do not attach this VCPU */
        struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
@@ -492,6 +497,7 @@ struct kvm_x86_ops {
        void (*set_segment)(struct kvm_vcpu *vcpu,
                            struct kvm_segment *var, int seg);
        void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
+       void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
        void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
        void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
        void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -501,12 +507,13 @@ struct kvm_x86_ops {
        void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
        void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
        void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
-       unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr);
-       void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value,
-                      int *exception);
+       int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest);
+       int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value);
        void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
        unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
        void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+       void (*fpu_activate)(struct kvm_vcpu *vcpu);
+       void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
 
        void (*tlb_flush)(struct kvm_vcpu *vcpu);
 
@@ -531,7 +538,8 @@ struct kvm_x86_ops {
        int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
        int (*get_tdp_level)(void);
        u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
-       bool (*gb_page_enable)(void);
+       int (*get_lpage_level)(void);
+       bool (*rdtscp_supported)(void);
 
        const struct trace_print_flags *exit_reasons_str;
 };
@@ -606,8 +614,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
                    unsigned long value);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
-int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
-                               int type_bits, int seg);
+int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
 
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason);
 
@@ -653,6 +660,10 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
+gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
+gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
+gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
@@ -666,6 +677,7 @@ void kvm_disable_tdp(void);
 
 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
 int complete_pio(struct kvm_vcpu *vcpu);
+bool kvm_check_iopl(struct kvm_vcpu *vcpu);
 
 struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
 
index c584076a47f48acf8d77d57b5a610abf4e366bfa..ffae1420e7d76d0b549360cec1b3b93071bc867a 100644 (file)
@@ -2,6 +2,7 @@
 #define _ASM_X86_KVM_PARA_H
 
 #include <linux/types.h>
+#include <asm/hyperv.h>
 
 /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
  * should be used to determine that a VM is running under KVM.
index 1fecb7e6113038f52c2f51f8fde2ef419d40c6e7..38638cd2fa4c87997dec931e0bcdd4f49f6fb28a 100644 (file)
@@ -313,7 +313,7 @@ struct __attribute__ ((__packed__)) vmcb {
 
 #define SVM_EXIT_ERR           -1
 
-#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
+#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
 
 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
 #define SVM_VMRUN  ".byte 0x0f, 0x01, 0xd8"
index 2b4945419a84bd16681c7b4d579ae507ccc8ba96..fb9a080740ecf48970e9648c85caac34bfaf09c6 100644 (file)
@@ -53,6 +53,7 @@
  */
 #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
 #define SECONDARY_EXEC_ENABLE_EPT               0x00000002
+#define SECONDARY_EXEC_RDTSCP                  0x00000008
 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
 #define SECONDARY_EXEC_WBINVD_EXITING          0x00000040
 #define SECONDARY_EXEC_UNRESTRICTED_GUEST      0x00000080
@@ -251,6 +252,7 @@ enum vmcs_field {
 #define EXIT_REASON_MSR_READ            31
 #define EXIT_REASON_MSR_WRITE           32
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
 #define EXIT_REASON_PAUSE_INSTRUCTION   40
 #define EXIT_REASON_MCE_DURING_VMENTRY  41
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
@@ -362,6 +364,7 @@ enum vmcs_field {
 #define VMX_EPTP_UC_BIT                                (1ull << 8)
 #define VMX_EPTP_WB_BIT                                (1ull << 14)
 #define VMX_EPT_2MB_PAGE_BIT                   (1ull << 16)
+#define VMX_EPT_1GB_PAGE_BIT                   (1ull << 17)
 #define VMX_EPT_EXTENT_INDIVIDUAL_BIT          (1ull << 24)
 #define VMX_EPT_EXTENT_CONTEXT_BIT             (1ull << 25)
 #define VMX_EPT_EXTENT_GLOBAL_BIT              (1ull << 26)
@@ -374,7 +377,7 @@ enum vmcs_field {
 #define VMX_EPT_READABLE_MASK                  0x1ull
 #define VMX_EPT_WRITABLE_MASK                  0x2ull
 #define VMX_EPT_EXECUTABLE_MASK                        0x4ull
-#define VMX_EPT_IGMT_BIT                       (1ull << 6)
+#define VMX_EPT_IPAT_BIT                       (1ull << 6)
 
 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR                0xfffbc000ul
 
index 9055e5872ff0a5afa668d1d7fa9517eeb6f618c8..1c0c6ab9c60f03394af20e57b3a09186a6cba57d 100644 (file)
@@ -301,7 +301,8 @@ static int __init vsyscall_init(void)
        register_sysctl_table(kernel_root_table2);
 #endif
        on_each_cpu(cpu_vsyscall_init, NULL, 1);
-       hotcpu_notifier(cpu_vsyscall_notifier, 0);
+       /* notifier priority > KVM */
+       hotcpu_notifier(cpu_vsyscall_notifier, 30);
        return 0;
 }
 
index 3c4d0109ad2051c71d9022c6d370a9ac47234cb0..970bbd4795161777e11b629151577160e5fc929e 100644 (file)
@@ -29,6 +29,7 @@ config KVM
        select HAVE_KVM_EVENTFD
        select KVM_APIC_ARCHITECTURE
        select USER_RETURN_NOTIFIER
+       select KVM_MMIO
        ---help---
          Support hosting fully virtualized guest machines using hardware
          virtualization extensions.  You will need a fairly recent
index 7e8faea4651e1a89dc2755f044681b11f94a78e2..4dade6ac08276537b07824ab7c23224d93946905 100644 (file)
@@ -32,7 +32,7 @@
 #include <linux/module.h>
 #include <asm/kvm_emulate.h>
 
-#include "mmu.h"               /* for is_long_mode() */
+#include "x86.h"
 
 /*
  * Opcode effective-address decode tables.
@@ -76,6 +76,8 @@
 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
 #define GroupMask   0xff        /* Group number stored in bits 0:7 */
 /* Misc flags */
+#define Lock        (1<<26) /* lock prefix is allowed for the instruction */
+#define Priv        (1<<27) /* instruction generates #GP if current CPL != 0 */
 #define No64       (1<<28)
 /* Source 2 operand type */
 #define Src2None    (0<<29)
 enum {
        Group1_80, Group1_81, Group1_82, Group1_83,
        Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
+       Group8, Group9,
 };
 
 static u32 opcode_table[256] = {
        /* 0x00 - 0x07 */
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+       ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
        ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
        ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
        /* 0x08 - 0x0F */
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+       ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
        ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
        ImplicitOps | Stack | No64, 0,
        /* 0x10 - 0x17 */
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+       ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
        ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
        ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
        /* 0x18 - 0x1F */
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+       ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
        ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
        ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
        /* 0x20 - 0x27 */
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+       ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
        DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
        /* 0x28 - 0x2F */
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+       ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
        0, 0, 0, 0,
        /* 0x30 - 0x37 */
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+       ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
        ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
        0, 0, 0, 0,
        /* 0x38 - 0x3F */
@@ -156,7 +159,7 @@ static u32 opcode_table[256] = {
        Group | Group1_80, Group | Group1_81,
        Group | Group1_82, Group | Group1_83,
        ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+       ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
        /* 0x88 - 0x8F */
        ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
        ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
@@ -210,7 +213,7 @@ static u32 opcode_table[256] = {
        SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
        /* 0xF0 - 0xF7 */
        0, 0, 0, 0,
-       ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
+       ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
        /* 0xF8 - 0xFF */
        ImplicitOps, 0, ImplicitOps, ImplicitOps,
        ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
@@ -218,16 +221,20 @@ static u32 opcode_table[256] = {
 
 static u32 twobyte_table[256] = {
        /* 0x00 - 0x0F */
-       0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
-       ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
+       0, Group | GroupDual | Group7, 0, 0,
+       0, ImplicitOps, ImplicitOps | Priv, 0,
+       ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
+       0, ImplicitOps | ModRM, 0, 0,
        /* 0x10 - 0x1F */
        0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
        /* 0x20 - 0x2F */
-       ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
+       ModRM | ImplicitOps | Priv, ModRM | Priv,
+       ModRM | ImplicitOps | Priv, ModRM | Priv,
+       0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0,
        /* 0x30 - 0x3F */
-       ImplicitOps, 0, ImplicitOps, 0,
-       ImplicitOps, ImplicitOps, 0, 0,
+       ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
+       ImplicitOps, ImplicitOps | Priv, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0,
        /* 0x40 - 0x47 */
        DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
@@ -257,21 +264,23 @@ static u32 twobyte_table[256] = {
        DstMem | SrcReg | Src2CL | ModRM, 0, 0,
        /* 0xA8 - 0xAF */
        ImplicitOps | Stack, ImplicitOps | Stack,
-       0, DstMem | SrcReg | ModRM | BitOp,
+       0, DstMem | SrcReg | ModRM | BitOp | Lock,
        DstMem | SrcReg | Src2ImmByte | ModRM,
        DstMem | SrcReg | Src2CL | ModRM,
        ModRM, 0,
        /* 0xB0 - 0xB7 */
-       ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
-           DstMem | SrcReg | ModRM | BitOp,
+       ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
+       0, DstMem | SrcReg | ModRM | BitOp | Lock,
        0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
            DstReg | SrcMem16 | ModRM | Mov,
        /* 0xB8 - 0xBF */
-       0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp,
+       0, 0,
+       Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
        0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
            DstReg | SrcMem16 | ModRM | Mov,
        /* 0xC0 - 0xCF */
-       0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
+       0, 0, 0, DstMem | SrcReg | ModRM | Mov,
+       0, 0, 0, Group | GroupDual | Group9,
        0, 0, 0, 0, 0, 0, 0, 0,
        /* 0xD0 - 0xDF */
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -283,25 +292,41 @@ static u32 twobyte_table[256] = {
 
 static u32 group_table[] = {
        [Group1_80*8] =
-       ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
-       ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
-       ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
-       ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+       ByteOp | DstMem | SrcImm | ModRM | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | Lock,
+       ByteOp | DstMem | SrcImm | ModRM,
        [Group1_81*8] =
-       DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
-       DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
-       DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
-       DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
+       DstMem | SrcImm | ModRM | Lock,
+       DstMem | SrcImm | ModRM | Lock,
+       DstMem | SrcImm | ModRM | Lock,
+       DstMem | SrcImm | ModRM | Lock,
+       DstMem | SrcImm | ModRM | Lock,
+       DstMem | SrcImm | ModRM | Lock,
+       DstMem | SrcImm | ModRM | Lock,
+       DstMem | SrcImm | ModRM,
        [Group1_82*8] =
-       ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
-       ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
-       ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
-       ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+       ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+       ByteOp | DstMem | SrcImm | ModRM | No64,
        [Group1_83*8] =
-       DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
-       DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
-       DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
-       DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+       DstMem | SrcImmByte | ModRM | Lock,
+       DstMem | SrcImmByte | ModRM | Lock,
+       DstMem | SrcImmByte | ModRM | Lock,
+       DstMem | SrcImmByte | ModRM | Lock,
+       DstMem | SrcImmByte | ModRM | Lock,
+       DstMem | SrcImmByte | ModRM | Lock,
+       DstMem | SrcImmByte | ModRM | Lock,
+       DstMem | SrcImmByte | ModRM,
        [Group1A*8] =
        DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
        [Group3_Byte*8] =
@@ -320,24 +345,39 @@ static u32 group_table[] = {
        SrcMem | ModRM | Stack, 0,
        SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0,
        [Group7*8] =
-       0, 0, ModRM | SrcMem, ModRM | SrcMem,
+       0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
        SrcNone | ModRM | DstMem | Mov, 0,
-       SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
+       SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
+       [Group8*8] =
+       0, 0, 0, 0,
+       DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
+       DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
+       [Group9*8] =
+       0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0,
 };
 
 static u32 group2_table[] = {
        [Group7*8] =
-       SrcNone | ModRM, 0, 0, SrcNone | ModRM,
+       SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM,
        SrcNone | ModRM | DstMem | Mov, 0,
        SrcMem16 | ModRM | Mov, 0,
+       [Group9*8] =
+       0, 0, 0, 0, 0, 0, 0, 0,
 };
 
 /* EFLAGS bit definitions. */
+#define EFLG_ID (1<<21)
+#define EFLG_VIP (1<<20)
+#define EFLG_VIF (1<<19)
+#define EFLG_AC (1<<18)
 #define EFLG_VM (1<<17)
 #define EFLG_RF (1<<16)
+#define EFLG_IOPL (3<<12)
+#define EFLG_NT (1<<14)
 #define EFLG_OF (1<<11)
 #define EFLG_DF (1<<10)
 #define EFLG_IF (1<<9)
+#define EFLG_TF (1<<8)
 #define EFLG_SF (1<<7)
 #define EFLG_ZF (1<<6)
 #define EFLG_AF (1<<4)
@@ -606,7 +646,7 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
 
        if (linear < fc->start || linear >= fc->end) {
                size = min(15UL, PAGE_SIZE - offset_in_page(linear));
-               rc = ops->read_std(linear, fc->data, size, ctxt->vcpu);
+               rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
                if (rc)
                        return rc;
                fc->start = linear;
@@ -661,11 +701,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
                op_bytes = 3;
        *address = 0;
        rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
-                          ctxt->vcpu);
+                          ctxt->vcpu, NULL);
        if (rc)
                return rc;
        rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
-                          ctxt->vcpu);
+                          ctxt->vcpu, NULL);
        return rc;
 }
 
@@ -889,6 +929,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
        switch (mode) {
        case X86EMUL_MODE_REAL:
+       case X86EMUL_MODE_VM86:
        case X86EMUL_MODE_PROT16:
                def_op_bytes = def_ad_bytes = 2;
                break;
@@ -975,7 +1016,7 @@ done_prefixes:
        }
 
        if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
-               kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");;
+               kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");
                return -1;
        }
 
@@ -1196,13 +1237,56 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
        rc = ops->read_emulated(register_address(c, ss_base(ctxt),
                                                 c->regs[VCPU_REGS_RSP]),
                                dest, len, ctxt->vcpu);
-       if (rc != 0)
+       if (rc != X86EMUL_CONTINUE)
                return rc;
 
        register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
        return rc;
 }
 
+static int emulate_popf(struct x86_emulate_ctxt *ctxt,
+                      struct x86_emulate_ops *ops,
+                      void *dest, int len)
+{
+       int rc;
+       unsigned long val, change_mask;
+       int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+       int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu);
+
+       rc = emulate_pop(ctxt, ops, &val, len);
+       if (rc != X86EMUL_CONTINUE)
+               return rc;
+
+       change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
+               | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
+
+       switch(ctxt->mode) {
+       case X86EMUL_MODE_PROT64:
+       case X86EMUL_MODE_PROT32:
+       case X86EMUL_MODE_PROT16:
+               if (cpl == 0)
+                       change_mask |= EFLG_IOPL;
+               if (cpl <= iopl)
+                       change_mask |= EFLG_IF;
+               break;
+       case X86EMUL_MODE_VM86:
+               if (iopl < 3) {
+                       kvm_inject_gp(ctxt->vcpu, 0);
+                       return X86EMUL_PROPAGATE_FAULT;
+               }
+               change_mask |= EFLG_IF;
+               break;
+       default: /* real mode */
+               change_mask |= (EFLG_IOPL | EFLG_IF);
+               break;
+       }
+
+       *(unsigned long *)dest =
+               (ctxt->eflags & ~change_mask) | (val & change_mask);
+
+       return rc;
+}
+
 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
 {
        struct decode_cache *c = &ctxt->decode;
@@ -1225,7 +1309,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
        if (rc != 0)
                return rc;
 
-       rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg);
+       rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg);
        return rc;
 }
 
@@ -1370,7 +1454,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
        int rc;
 
        rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
-       if (rc != 0)
+       if (rc != X86EMUL_CONTINUE)
                return rc;
 
        if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
@@ -1385,7 +1469,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
                       (u32) c->regs[VCPU_REGS_RBX];
 
                rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
-               if (rc != 0)
+               if (rc != X86EMUL_CONTINUE)
                        return rc;
                ctxt->eflags |= EFLG_ZF;
        }
@@ -1407,7 +1491,7 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
        rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
        if (rc)
                return rc;
-       rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
+       rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS);
        return rc;
 }
 
@@ -1451,7 +1535,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
                                        &c->dst.val,
                                        c->dst.bytes,
                                        ctxt->vcpu);
-               if (rc != 0)
+               if (rc != X86EMUL_CONTINUE)
                        return rc;
                break;
        case OP_NONE:
@@ -1514,9 +1598,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
        u64 msr_data;
 
        /* syscall is not available in real mode */
-       if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
-               || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
-               return -1;
+       if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86)
+               return X86EMUL_UNHANDLEABLE;
 
        setup_syscalls_segments(ctxt, &cs, &ss);
 
@@ -1553,7 +1636,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
                ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
        }
 
-       return 0;
+       return X86EMUL_CONTINUE;
 }
 
 static int
@@ -1563,22 +1646,17 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
        struct kvm_segment cs, ss;
        u64 msr_data;
 
-       /* inject #UD if LOCK prefix is used */
-       if (c->lock_prefix)
-               return -1;
-
-       /* inject #GP if in real mode or paging is disabled */
-       if (ctxt->mode == X86EMUL_MODE_REAL ||
-               !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
+       /* inject #GP if in real mode */
+       if (ctxt->mode == X86EMUL_MODE_REAL) {
                kvm_inject_gp(ctxt->vcpu, 0);
-               return -1;
+               return X86EMUL_UNHANDLEABLE;
        }
 
        /* XXX sysenter/sysexit have not been tested in 64bit mode.
        * Therefore, we inject an #UD.
        */
        if (ctxt->mode == X86EMUL_MODE_PROT64)
-               return -1;
+               return X86EMUL_UNHANDLEABLE;
 
        setup_syscalls_segments(ctxt, &cs, &ss);
 
@@ -1587,13 +1665,13 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
        case X86EMUL_MODE_PROT32:
                if ((msr_data & 0xfffc) == 0x0) {
                        kvm_inject_gp(ctxt->vcpu, 0);
-                       return -1;
+                       return X86EMUL_PROPAGATE_FAULT;
                }
                break;
        case X86EMUL_MODE_PROT64:
                if (msr_data == 0x0) {
                        kvm_inject_gp(ctxt->vcpu, 0);
-                       return -1;
+                       return X86EMUL_PROPAGATE_FAULT;
                }
                break;
        }
@@ -1618,7 +1696,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
        kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
        c->regs[VCPU_REGS_RSP] = msr_data;
 
-       return 0;
+       return X86EMUL_CONTINUE;
 }
 
 static int
@@ -1629,21 +1707,11 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
        u64 msr_data;
        int usermode;
 
-       /* inject #UD if LOCK prefix is used */
-       if (c->lock_prefix)
-               return -1;
-
-       /* inject #GP if in real mode or paging is disabled */
-       if (ctxt->mode == X86EMUL_MODE_REAL
-               || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
-               kvm_inject_gp(ctxt->vcpu, 0);
-               return -1;
-       }
-
-       /* sysexit must be called from CPL 0 */
-       if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
+       /* inject #GP if in real mode or Virtual 8086 mode */
+       if (ctxt->mode == X86EMUL_MODE_REAL ||
+           ctxt->mode == X86EMUL_MODE_VM86) {
                kvm_inject_gp(ctxt->vcpu, 0);
-               return -1;
+               return X86EMUL_UNHANDLEABLE;
        }
 
        setup_syscalls_segments(ctxt, &cs, &ss);
@@ -1661,7 +1729,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
                cs.selector = (u16)(msr_data + 16);
                if ((msr_data & 0xfffc) == 0x0) {
                        kvm_inject_gp(ctxt->vcpu, 0);
-                       return -1;
+                       return X86EMUL_PROPAGATE_FAULT;
                }
                ss.selector = (u16)(msr_data + 24);
                break;
@@ -1669,7 +1737,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
                cs.selector = (u16)(msr_data + 32);
                if (msr_data == 0x0) {
                        kvm_inject_gp(ctxt->vcpu, 0);
-                       return -1;
+                       return X86EMUL_PROPAGATE_FAULT;
                }
                ss.selector = cs.selector + 8;
                cs.db = 0;
@@ -1685,7 +1753,58 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
        c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
        c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
 
-       return 0;
+       return X86EMUL_CONTINUE;
+}
+
+static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
+{
+       int iopl;
+       if (ctxt->mode == X86EMUL_MODE_REAL)
+               return false;
+       if (ctxt->mode == X86EMUL_MODE_VM86)
+               return true;
+       iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+       return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl;
+}
+
+static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
+                                           struct x86_emulate_ops *ops,
+                                           u16 port, u16 len)
+{
+       struct kvm_segment tr_seg;
+       int r;
+       u16 io_bitmap_ptr;
+       u8 perm, bit_idx = port & 0x7;
+       unsigned mask = (1 << len) - 1;
+
+       kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
+       if (tr_seg.unusable)
+               return false;
+       if (tr_seg.limit < 103)
+               return false;
+       r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
+                         NULL);
+       if (r != X86EMUL_CONTINUE)
+               return false;
+       if (io_bitmap_ptr + port/8 > tr_seg.limit)
+               return false;
+       r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
+                         ctxt->vcpu, NULL);
+       if (r != X86EMUL_CONTINUE)
+               return false;
+       if ((perm >> bit_idx) & mask)
+               return false;
+       return true;
+}
+
+static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
+                                struct x86_emulate_ops *ops,
+                                u16 port, u16 len)
+{
+       if (emulator_bad_iopl(ctxt))
+               if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
+                       return false;
+       return true;
 }
 
 int
@@ -1709,6 +1828,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
        memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
        saved_eip = c->eip;
 
+       /* LOCK prefix is allowed only with some instructions */
+       if (c->lock_prefix && !(c->d & Lock)) {
+               kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+               goto done;
+       }
+
+       /* Privileged instruction can be executed only in CPL=0 */
+       if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+               kvm_inject_gp(ctxt->vcpu, 0);
+               goto done;
+       }
+
        if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
                memop = c->modrm_ea;
 
@@ -1749,7 +1880,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
                                        &c->src.val,
                                        c->src.bytes,
                                        ctxt->vcpu);
-               if (rc != 0)
+               if (rc != X86EMUL_CONTINUE)
                        goto done;
                c->src.orig_val = c->src.val;
        }
@@ -1768,12 +1899,15 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
                        c->dst.ptr = (void *)c->dst.ptr +
                                                   (c->src.val & mask) / 8;
                }
-               if (!(c->d & Mov) &&
-                                  /* optimisation - avoid slow emulated read */
-                   ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
-                                          &c->dst.val,
-                                         c->dst.bytes, ctxt->vcpu)) != 0))
-                       goto done;
+               if (!(c->d & Mov)) {
+                       /* optimisation - avoid slow emulated read */
+                       rc = ops->read_emulated((unsigned long)c->dst.ptr,
+                                               &c->dst.val,
+                                               c->dst.bytes,
+                                               ctxt->vcpu);
+                       if (rc != X86EMUL_CONTINUE)
+                               goto done;
+               }
        }
        c->dst.orig_val = c->dst.val;
 
@@ -1876,7 +2010,12 @@ special_insn:
                break;
        case 0x6c:              /* insb */
        case 0x6d:              /* insw/insd */
-                if (kvm_emulate_pio_string(ctxt->vcpu,
+               if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
+                                         (c->d & ByteOp) ? 1 : c->op_bytes)) {
+                       kvm_inject_gp(ctxt->vcpu, 0);
+                       goto done;
+               }
+               if (kvm_emulate_pio_string(ctxt->vcpu,
                                1,
                                (c->d & ByteOp) ? 1 : c->op_bytes,
                                c->rep_prefix ?
@@ -1892,6 +2031,11 @@ special_insn:
                return 0;
        case 0x6e:              /* outsb */
        case 0x6f:              /* outsw/outsd */
+               if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
+                                         (c->d & ByteOp) ? 1 : c->op_bytes)) {
+                       kvm_inject_gp(ctxt->vcpu, 0);
+                       goto done;
+               }
                if (kvm_emulate_pio_string(ctxt->vcpu,
                                0,
                                (c->d & ByteOp) ? 1 : c->op_bytes,
@@ -1978,25 +2122,19 @@ special_insn:
                break;
        case 0x8e: { /* mov seg, r/m16 */
                uint16_t sel;
-               int type_bits;
-               int err;
 
                sel = c->src.val;
-               if (c->modrm_reg == VCPU_SREG_SS)
-                       toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
 
-               if (c->modrm_reg <= 5) {
-                       type_bits = (c->modrm_reg == 1) ? 9 : 1;
-                       err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
-                                                         type_bits, c->modrm_reg);
-               } else {
-                       printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n",
-                                       c->modrm);
-                       goto cannot_emulate;
+               if (c->modrm_reg == VCPU_SREG_CS ||
+                   c->modrm_reg > VCPU_SREG_GS) {
+                       kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+                       goto done;
                }
 
-               if (err < 0)
-                       goto cannot_emulate;
+               if (c->modrm_reg == VCPU_SREG_SS)
+                       toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
+
+               rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg);
 
                c->dst.type = OP_NONE;  /* Disable writeback. */
                break;
@@ -2025,7 +2163,10 @@ special_insn:
                c->dst.type = OP_REG;
                c->dst.ptr = (unsigned long *) &ctxt->eflags;
                c->dst.bytes = c->op_bytes;
-               goto pop_instruction;
+               rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
+               if (rc != X86EMUL_CONTINUE)
+                       goto done;
+               break;
        case 0xa0 ... 0xa1:     /* mov */
                c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
                c->dst.val = c->src.val;
@@ -2039,11 +2180,12 @@ special_insn:
                c->dst.ptr = (unsigned long *)register_address(c,
                                                   es_base(ctxt),
                                                   c->regs[VCPU_REGS_RDI]);
-               if ((rc = ops->read_emulated(register_address(c,
-                                          seg_override_base(ctxt, c),
-                                       c->regs[VCPU_REGS_RSI]),
+               rc = ops->read_emulated(register_address(c,
+                                               seg_override_base(ctxt, c),
+                                               c->regs[VCPU_REGS_RSI]),
                                        &c->dst.val,
-                                       c->dst.bytes, ctxt->vcpu)) != 0)
+                                       c->dst.bytes, ctxt->vcpu);
+               if (rc != X86EMUL_CONTINUE)
                        goto done;
                register_address_increment(c, &c->regs[VCPU_REGS_RSI],
                                       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -2058,10 +2200,11 @@ special_insn:
                c->src.ptr = (unsigned long *)register_address(c,
                                       seg_override_base(ctxt, c),
                                                   c->regs[VCPU_REGS_RSI]);
-               if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
-                                               &c->src.val,
-                                               c->src.bytes,
-                                               ctxt->vcpu)) != 0)
+               rc = ops->read_emulated((unsigned long)c->src.ptr,
+                                       &c->src.val,
+                                       c->src.bytes,
+                                       ctxt->vcpu);
+               if (rc != X86EMUL_CONTINUE)
                        goto done;
 
                c->dst.type = OP_NONE; /* Disable writeback. */
@@ -2069,10 +2212,11 @@ special_insn:
                c->dst.ptr = (unsigned long *)register_address(c,
                                                   es_base(ctxt),
                                                   c->regs[VCPU_REGS_RDI]);
-               if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
-                                               &c->dst.val,
-                                               c->dst.bytes,
-                                               ctxt->vcpu)) != 0)
+               rc = ops->read_emulated((unsigned long)c->dst.ptr,
+                                       &c->dst.val,
+                                       c->dst.bytes,
+                                       ctxt->vcpu);
+               if (rc != X86EMUL_CONTINUE)
                        goto done;
 
                DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
@@ -2102,12 +2246,13 @@ special_insn:
                c->dst.type = OP_REG;
                c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
                c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
-               if ((rc = ops->read_emulated(register_address(c,
-                                                seg_override_base(ctxt, c),
-                                                c->regs[VCPU_REGS_RSI]),
-                                                &c->dst.val,
-                                                c->dst.bytes,
-                                                ctxt->vcpu)) != 0)
+               rc = ops->read_emulated(register_address(c,
+                                               seg_override_base(ctxt, c),
+                                               c->regs[VCPU_REGS_RSI]),
+                                       &c->dst.val,
+                                       c->dst.bytes,
+                                       ctxt->vcpu);
+               if (rc != X86EMUL_CONTINUE)
                        goto done;
                register_address_increment(c, &c->regs[VCPU_REGS_RSI],
                                       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -2163,11 +2308,9 @@ special_insn:
        case 0xe9: /* jmp rel */
                goto jmp;
        case 0xea: /* jmp far */
-               if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9,
-                                       VCPU_SREG_CS) < 0) {
-                       DPRINTF("jmp far: Failed to load CS descriptor\n");
-                       goto cannot_emulate;
-               }
+               if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val,
+                                               VCPU_SREG_CS))
+                       goto done;
 
                c->eip = c->src.val;
                break;
@@ -2185,7 +2328,13 @@ special_insn:
        case 0xef: /* out (e/r)ax,dx */
                port = c->regs[VCPU_REGS_RDX];
                io_dir_in = 0;
-       do_io:  if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
+       do_io:
+               if (!emulator_io_permited(ctxt, ops, port,
+                                         (c->d & ByteOp) ? 1 : c->op_bytes)) {
+                       kvm_inject_gp(ctxt->vcpu, 0);
+                       goto done;
+               }
+               if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
                                   (c->d & ByteOp) ? 1 : c->op_bytes,
                                   port) != 0) {
                        c->eip = saved_eip;
@@ -2210,13 +2359,21 @@ special_insn:
                c->dst.type = OP_NONE;  /* Disable writeback. */
                break;
        case 0xfa: /* cli */
-               ctxt->eflags &= ~X86_EFLAGS_IF;
-               c->dst.type = OP_NONE;  /* Disable writeback. */
+               if (emulator_bad_iopl(ctxt))
+                       kvm_inject_gp(ctxt->vcpu, 0);
+               else {
+                       ctxt->eflags &= ~X86_EFLAGS_IF;
+                       c->dst.type = OP_NONE;  /* Disable writeback. */
+               }
                break;
        case 0xfb: /* sti */
-               toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
-               ctxt->eflags |= X86_EFLAGS_IF;
-               c->dst.type = OP_NONE;  /* Disable writeback. */
+               if (emulator_bad_iopl(ctxt))
+                       kvm_inject_gp(ctxt->vcpu, 0);
+               else {
+                       toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
+                       ctxt->eflags |= X86_EFLAGS_IF;
+                       c->dst.type = OP_NONE;  /* Disable writeback. */
+               }
                break;
        case 0xfc: /* cld */
                ctxt->eflags &= ~EFLG_DF;
@@ -2319,8 +2476,9 @@ twobyte_insn:
                }
                break;
        case 0x05:              /* syscall */
-               if (emulate_syscall(ctxt) == -1)
-                       goto cannot_emulate;
+               rc = emulate_syscall(ctxt);
+               if (rc != X86EMUL_CONTINUE)
+                       goto done;
                else
                        goto writeback;
                break;
@@ -2391,14 +2549,16 @@ twobyte_insn:
                c->dst.type = OP_NONE;
                break;
        case 0x34:              /* sysenter */
-               if (emulate_sysenter(ctxt) == -1)
-                       goto cannot_emulate;
+               rc = emulate_sysenter(ctxt);
+               if (rc != X86EMUL_CONTINUE)
+                       goto done;
                else
                        goto writeback;
                break;
        case 0x35:              /* sysexit */
-               if (emulate_sysexit(ctxt) == -1)
-                       goto cannot_emulate;
+               rc = emulate_sysexit(ctxt);
+               if (rc != X86EMUL_CONTINUE)
+                       goto done;
                else
                        goto writeback;
                break;
index 15578f180e596bee481f10451ec375b6ceb37f26..294698b6daff60c288dcd1304470ab7c6254000f 100644 (file)
@@ -242,11 +242,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
 {
        struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
                                                 irq_ack_notifier);
-       spin_lock(&ps->inject_lock);
+       raw_spin_lock(&ps->inject_lock);
        if (atomic_dec_return(&ps->pit_timer.pending) < 0)
                atomic_inc(&ps->pit_timer.pending);
        ps->irq_ack = 1;
-       spin_unlock(&ps->inject_lock);
+       raw_spin_unlock(&ps->inject_lock);
 }
 
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -605,7 +605,7 @@ static const struct kvm_io_device_ops speaker_dev_ops = {
        .write    = speaker_ioport_write,
 };
 
-/* Caller must have writers lock on slots_lock */
+/* Caller must hold slots_lock */
 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 {
        struct kvm_pit *pit;
@@ -624,7 +624,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 
        mutex_init(&pit->pit_state.lock);
        mutex_lock(&pit->pit_state.lock);
-       spin_lock_init(&pit->pit_state.inject_lock);
+       raw_spin_lock_init(&pit->pit_state.inject_lock);
 
        kvm->arch.vpit = pit;
        pit->kvm = kvm;
@@ -645,13 +645,13 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
        kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
 
        kvm_iodevice_init(&pit->dev, &pit_dev_ops);
-       ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+       ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
        if (ret < 0)
                goto fail;
 
        if (flags & KVM_PIT_SPEAKER_DUMMY) {
                kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
-               ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
+               ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
                                                &pit->speaker_dev);
                if (ret < 0)
                        goto fail_unregister;
@@ -660,11 +660,12 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
        return pit;
 
 fail_unregister:
-       __kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
+       kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
 
 fail:
-       if (pit->irq_source_id >= 0)
-               kvm_free_irq_source_id(kvm, pit->irq_source_id);
+       kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+       kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
+       kvm_free_irq_source_id(kvm, pit->irq_source_id);
 
        kfree(pit);
        return NULL;
@@ -723,12 +724,12 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
                /* Try to inject pending interrupts when
                 * last one has been acked.
                 */
-               spin_lock(&ps->inject_lock);
+               raw_spin_lock(&ps->inject_lock);
                if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
                        ps->irq_ack = 0;
                        inject = 1;
                }
-               spin_unlock(&ps->inject_lock);
+               raw_spin_unlock(&ps->inject_lock);
                if (inject)
                        __inject_pit_timer_intr(kvm);
        }
index d4c1c7ffdc099ac1e3153bf22724b5b2095d307e..900d6b0ba7c2347ed4fb487a92dc3a2d30c83a9f 100644 (file)
@@ -27,7 +27,7 @@ struct kvm_kpit_state {
        u32    speaker_data_on;
        struct mutex lock;
        struct kvm_pit *pit;
-       spinlock_t inject_lock;
+       raw_spinlock_t inject_lock;
        unsigned long irq_ack;
        struct kvm_irq_ack_notifier irq_ack_notifier;
 };
index d057c0cbd2457d1b665cdfa9fefd71cdbc726d22..07771da85de55a75db97f4c7522ad465ce386b26 100644 (file)
@@ -44,18 +44,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
         * Other interrupt may be delivered to PIC while lock is dropped but
         * it should be safe since PIC state is already updated at this stage.
         */
-       spin_unlock(&s->pics_state->lock);
+       raw_spin_unlock(&s->pics_state->lock);
        kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
-       spin_lock(&s->pics_state->lock);
+       raw_spin_lock(&s->pics_state->lock);
 }
 
 void kvm_pic_clear_isr_ack(struct kvm *kvm)
 {
        struct kvm_pic *s = pic_irqchip(kvm);
-       spin_lock(&s->lock);
+
+       raw_spin_lock(&s->lock);
        s->pics[0].isr_ack = 0xff;
        s->pics[1].isr_ack = 0xff;
-       spin_unlock(&s->lock);
+       raw_spin_unlock(&s->lock);
 }
 
 /*
@@ -156,9 +157,9 @@ static void pic_update_irq(struct kvm_pic *s)
 
 void kvm_pic_update_irq(struct kvm_pic *s)
 {
-       spin_lock(&s->lock);
+       raw_spin_lock(&s->lock);
        pic_update_irq(s);
-       spin_unlock(&s->lock);
+       raw_spin_unlock(&s->lock);
 }
 
 int kvm_pic_set_irq(void *opaque, int irq, int level)
@@ -166,14 +167,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
        struct kvm_pic *s = opaque;
        int ret = -1;
 
-       spin_lock(&s->lock);
+       raw_spin_lock(&s->lock);
        if (irq >= 0 && irq < PIC_NUM_PINS) {
                ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
                pic_update_irq(s);
                trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
                                      s->pics[irq >> 3].imr, ret == 0);
        }
-       spin_unlock(&s->lock);
+       raw_spin_unlock(&s->lock);
 
        return ret;
 }
@@ -203,7 +204,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
        int irq, irq2, intno;
        struct kvm_pic *s = pic_irqchip(kvm);
 
-       spin_lock(&s->lock);
+       raw_spin_lock(&s->lock);
        irq = pic_get_irq(&s->pics[0]);
        if (irq >= 0) {
                pic_intack(&s->pics[0], irq);
@@ -228,7 +229,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
                intno = s->pics[0].irq_base + irq;
        }
        pic_update_irq(s);
-       spin_unlock(&s->lock);
+       raw_spin_unlock(&s->lock);
 
        return intno;
 }
@@ -442,7 +443,7 @@ static int picdev_write(struct kvm_io_device *this,
                        printk(KERN_ERR "PIC: non byte write\n");
                return 0;
        }
-       spin_lock(&s->lock);
+       raw_spin_lock(&s->lock);
        switch (addr) {
        case 0x20:
        case 0x21:
@@ -455,7 +456,7 @@ static int picdev_write(struct kvm_io_device *this,
                elcr_ioport_write(&s->pics[addr & 1], addr, data);
                break;
        }
-       spin_unlock(&s->lock);
+       raw_spin_unlock(&s->lock);
        return 0;
 }
 
@@ -472,7 +473,7 @@ static int picdev_read(struct kvm_io_device *this,
                        printk(KERN_ERR "PIC: non byte read\n");
                return 0;
        }
-       spin_lock(&s->lock);
+       raw_spin_lock(&s->lock);
        switch (addr) {
        case 0x20:
        case 0x21:
@@ -486,7 +487,7 @@ static int picdev_read(struct kvm_io_device *this,
                break;
        }
        *(unsigned char *)val = data;
-       spin_unlock(&s->lock);
+       raw_spin_unlock(&s->lock);
        return 0;
 }
 
@@ -520,7 +521,7 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
        s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
        if (!s)
                return NULL;
-       spin_lock_init(&s->lock);
+       raw_spin_lock_init(&s->lock);
        s->kvm = kvm;
        s->pics[0].elcr_mask = 0xf8;
        s->pics[1].elcr_mask = 0xde;
@@ -533,7 +534,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
         * Initialize PIO device
         */
        kvm_iodevice_init(&s->dev, &picdev_ops);
-       ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
+       mutex_lock(&kvm->slots_lock);
+       ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
+       mutex_unlock(&kvm->slots_lock);
        if (ret < 0) {
                kfree(s);
                return NULL;
@@ -541,3 +544,14 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 
        return s;
 }
+
+void kvm_destroy_pic(struct kvm *kvm)
+{
+       struct kvm_pic *vpic = kvm->arch.vpic;
+
+       if (vpic) {
+               kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
+               kvm->arch.vpic = NULL;
+               kfree(vpic);
+       }
+}
index be399e207d57943ac74a6967e3dd21776b262dee..34b15915754d80f459088a1696c9d16f79c3bd5b 100644 (file)
@@ -62,7 +62,7 @@ struct kvm_kpic_state {
 };
 
 struct kvm_pic {
-       spinlock_t lock;
+       raw_spinlock_t lock;
        unsigned pending_acks;
        struct kvm *kvm;
        struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
@@ -75,6 +75,7 @@ struct kvm_pic {
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
+void kvm_destroy_pic(struct kvm *kvm);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
 void kvm_pic_clear_isr_ack(struct kvm *kvm);
index 7bcc5b6a4403f6b3bcc816f8279c48c6ebb52ce8..cff851cf5322f59db69fc2b69d3ea306480e96d6 100644 (file)
@@ -1,6 +1,11 @@
 #ifndef ASM_KVM_CACHE_REGS_H
 #define ASM_KVM_CACHE_REGS_H
 
+#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
+#define KVM_POSSIBLE_CR4_GUEST_BITS                              \
+       (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR  \
+        | X86_CR4_OSXMMEXCPT | X86_CR4_PGE)
+
 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
                                              enum kvm_reg reg)
 {
@@ -38,4 +43,30 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
        return vcpu->arch.pdptrs[index];
 }
 
+static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
+{
+       ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
+       if (tmask & vcpu->arch.cr0_guest_owned_bits)
+               kvm_x86_ops->decache_cr0_guest_bits(vcpu);
+       return vcpu->arch.cr0 & mask;
+}
+
+static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu)
+{
+       return kvm_read_cr0_bits(vcpu, ~0UL);
+}
+
+static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
+{
+       ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS;
+       if (tmask & vcpu->arch.cr4_guest_owned_bits)
+               kvm_x86_ops->decache_cr4_guest_bits(vcpu);
+       return vcpu->arch.cr4 & mask;
+}
+
+static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
+{
+       return kvm_read_cr4_bits(vcpu, ~0UL);
+}
+
 #endif
index ba8c045da7820fceea3286b2a60c1378fe28265d..4b224f90087bd602ce3c74a09392cb7dc5d23f07 100644 (file)
@@ -1246,3 +1246,34 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
 
        return 0;
 }
+
+int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
+{
+       struct kvm_lapic *apic = vcpu->arch.apic;
+
+       if (!irqchip_in_kernel(vcpu->kvm))
+               return 1;
+
+       /* if this is ICR write vector before command */
+       if (reg == APIC_ICR)
+               apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
+       return apic_reg_write(apic, reg, (u32)data);
+}
+
+int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
+{
+       struct kvm_lapic *apic = vcpu->arch.apic;
+       u32 low, high = 0;
+
+       if (!irqchip_in_kernel(vcpu->kvm))
+               return 1;
+
+       if (apic_reg_read(apic, reg, 4, &low))
+               return 1;
+       if (reg == APIC_ICR)
+               apic_reg_read(apic, APIC_ICR2, 4, &high);
+
+       *data = (((u64)high) << 32) | low;
+
+       return 0;
+}
index 40010b09c4aa11a2c977e80ac314f5e43f1c8e44..f5fe32c5edadb852841fcfcc60b96758624ed92b 100644 (file)
@@ -48,4 +48,12 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
 
 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+
+int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+
+static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
+}
 #endif
index 89a49fb46a275e6eebcf2609f3870696b3a3945b..741373e8ca777a318ed7e487ef8982a181ea24f4 100644 (file)
@@ -18,6 +18,7 @@
  */
 
 #include "mmu.h"
+#include "x86.h"
 #include "kvm_cache_regs.h"
 
 #include <linux/kvm_host.h>
@@ -29,6 +30,7 @@
 #include <linux/swap.h>
 #include <linux/hugetlb.h>
 #include <linux/compiler.h>
+#include <linux/srcu.h>
 
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
@@ -136,16 +138,6 @@ module_param(oos_shadow, bool, 0644);
 #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
                        | PT64_NX_MASK)
 
-#define PFERR_PRESENT_MASK (1U << 0)
-#define PFERR_WRITE_MASK (1U << 1)
-#define PFERR_USER_MASK (1U << 2)
-#define PFERR_RSVD_MASK (1U << 3)
-#define PFERR_FETCH_MASK (1U << 4)
-
-#define PT_PDPE_LEVEL 3
-#define PT_DIRECTORY_LEVEL 2
-#define PT_PAGE_TABLE_LEVEL 1
-
 #define RMAP_EXT 4
 
 #define ACC_EXEC_MASK    1
@@ -153,6 +145,9 @@ module_param(oos_shadow, bool, 0644);
 #define ACC_USER_MASK    PT_USER_MASK
 #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
 
+#include <trace/events/kvm.h>
+
+#undef TRACE_INCLUDE_FILE
 #define CREATE_TRACE_POINTS
 #include "mmutrace.h"
 
@@ -229,7 +224,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
 
 static int is_write_protection(struct kvm_vcpu *vcpu)
 {
-       return vcpu->arch.cr0 & X86_CR0_WP;
+       return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
 }
 
 static int is_cpuid_PSE36(void)
@@ -239,7 +234,7 @@ static int is_cpuid_PSE36(void)
 
 static int is_nx(struct kvm_vcpu *vcpu)
 {
-       return vcpu->arch.shadow_efer & EFER_NX;
+       return vcpu->arch.efer & EFER_NX;
 }
 
 static int is_shadow_present_pte(u64 pte)
@@ -253,7 +248,7 @@ static int is_large_pte(u64 pte)
        return pte & PT_PAGE_SIZE_MASK;
 }
 
-static int is_writeble_pte(unsigned long pte)
+static int is_writable_pte(unsigned long pte)
 {
        return pte & PT_WRITABLE_MASK;
 }
@@ -470,24 +465,10 @@ static int has_wrprotected_page(struct kvm *kvm,
 
 static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
 {
-       unsigned long page_size = PAGE_SIZE;
-       struct vm_area_struct *vma;
-       unsigned long addr;
+       unsigned long page_size;
        int i, ret = 0;
 
-       addr = gfn_to_hva(kvm, gfn);
-       if (kvm_is_error_hva(addr))
-               return PT_PAGE_TABLE_LEVEL;
-
-       down_read(&current->mm->mmap_sem);
-       vma = find_vma(current->mm, addr);
-       if (!vma)
-               goto out;
-
-       page_size = vma_kernel_pagesize(vma);
-
-out:
-       up_read(&current->mm->mmap_sem);
+       page_size = kvm_host_page_size(kvm, gfn);
 
        for (i = PT_PAGE_TABLE_LEVEL;
             i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
@@ -503,8 +484,7 @@ out:
 static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
 {
        struct kvm_memory_slot *slot;
-       int host_level;
-       int level = PT_PAGE_TABLE_LEVEL;
+       int host_level, level, max_level;
 
        slot = gfn_to_memslot(vcpu->kvm, large_gfn);
        if (slot && slot->dirty_bitmap)
@@ -515,7 +495,10 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
        if (host_level == PT_PAGE_TABLE_LEVEL)
                return host_level;
 
-       for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level)
+       max_level = kvm_x86_ops->get_lpage_level() < host_level ?
+               kvm_x86_ops->get_lpage_level() : host_level;
+
+       for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
                if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
                        break;
 
@@ -633,7 +616,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
        pfn = spte_to_pfn(*spte);
        if (*spte & shadow_accessed_mask)
                kvm_set_pfn_accessed(pfn);
-       if (is_writeble_pte(*spte))
+       if (is_writable_pte(*spte))
                kvm_set_pfn_dirty(pfn);
        rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
        if (!*rmapp) {
@@ -662,6 +645,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
                        prev_desc = desc;
                        desc = desc->more;
                }
+               pr_err("rmap_remove: %p %llx many->many\n", spte, *spte);
                BUG();
        }
 }
@@ -708,7 +692,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
                BUG_ON(!spte);
                BUG_ON(!(*spte & PT_PRESENT_MASK));
                rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
-               if (is_writeble_pte(*spte)) {
+               if (is_writable_pte(*spte)) {
                        __set_spte(spte, *spte & ~PT_WRITABLE_MASK);
                        write_protected = 1;
                }
@@ -732,7 +716,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
                        BUG_ON(!(*spte & PT_PRESENT_MASK));
                        BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
                        pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
-                       if (is_writeble_pte(*spte)) {
+                       if (is_writable_pte(*spte)) {
                                rmap_remove(kvm, spte);
                                --kvm->stat.lpages;
                                __set_spte(spte, shadow_trap_nonpresent_pte);
@@ -787,7 +771,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
                        new_spte &= ~PT_WRITABLE_MASK;
                        new_spte &= ~SPTE_HOST_WRITEABLE;
-                       if (is_writeble_pte(*spte))
+                       if (is_writable_pte(*spte))
                                kvm_set_pfn_dirty(spte_to_pfn(*spte));
                        __set_spte(spte, new_spte);
                        spte = rmap_next(kvm, rmapp, spte);
@@ -805,35 +789,32 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
                                         unsigned long data))
 {
        int i, j;
+       int ret;
        int retval = 0;
+       struct kvm_memslots *slots;
 
-       /*
-        * If mmap_sem isn't taken, we can look the memslots with only
-        * the mmu_lock by skipping over the slots with userspace_addr == 0.
-        */
-       for (i = 0; i < kvm->nmemslots; i++) {
-               struct kvm_memory_slot *memslot = &kvm->memslots[i];
+       slots = rcu_dereference(kvm->memslots);
+
+       for (i = 0; i < slots->nmemslots; i++) {
+               struct kvm_memory_slot *memslot = &slots->memslots[i];
                unsigned long start = memslot->userspace_addr;
                unsigned long end;
 
-               /* mmu_lock protects userspace_addr */
-               if (!start)
-                       continue;
-
                end = start + (memslot->npages << PAGE_SHIFT);
                if (hva >= start && hva < end) {
                        gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
 
-                       retval |= handler(kvm, &memslot->rmap[gfn_offset],
-                                         data);
+                       ret = handler(kvm, &memslot->rmap[gfn_offset], data);
 
                        for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
                                int idx = gfn_offset;
                                idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
-                               retval |= handler(kvm,
+                               ret |= handler(kvm,
                                        &memslot->lpage_info[j][idx].rmap_pde,
                                        data);
                        }
+                       trace_kvm_age_page(hva, memslot, ret);
+                       retval |= ret;
                }
        }
 
@@ -856,9 +837,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
        u64 *spte;
        int young = 0;
 
-       /* always return old for EPT */
+       /*
+        * Emulate the accessed bit for EPT, by checking if this page has
+        * an EPT mapping, and clearing it if it does. On the next access,
+        * a new EPT mapping will be established.
+        * This has some overhead, but not as much as the cost of swapping
+        * out actively used pages or breaking up actively used hugepages.
+        */
        if (!shadow_accessed_mask)
-               return 0;
+               return kvm_unmap_rmapp(kvm, rmapp, data);
 
        spte = rmap_next(kvm, rmapp, NULL);
        while (spte) {
@@ -1615,7 +1602,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 
 static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
 {
-       int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
+       int slot = memslot_id(kvm, gfn);
        struct kvm_mmu_page *sp = page_header(__pa(pte));
 
        __set_bit(slot, sp->slot_bitmap);
@@ -1639,7 +1626,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 {
        struct page *page;
 
-       gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+       gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
 
        if (gpa == UNMAPPED_GVA)
                return NULL;
@@ -1852,7 +1839,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                 * is responsibility of mmu_get_page / kvm_sync_page.
                 * Same reasoning can be applied to dirty page accounting.
                 */
-               if (!can_unsync && is_writeble_pte(*sptep))
+               if (!can_unsync && is_writable_pte(*sptep))
                        goto set_pte;
 
                if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
@@ -1860,7 +1847,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                                 __func__, gfn);
                        ret = 1;
                        pte_access &= ~ACC_WRITE_MASK;
-                       if (is_writeble_pte(spte))
+                       if (is_writable_pte(spte))
                                spte &= ~PT_WRITABLE_MASK;
                }
        }
@@ -1881,7 +1868,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                         bool reset_host_protection)
 {
        int was_rmapped = 0;
-       int was_writeble = is_writeble_pte(*sptep);
+       int was_writable = is_writable_pte(*sptep);
        int rmap_count;
 
        pgprintk("%s: spte %llx access %x write_fault %d"
@@ -1932,7 +1919,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
                if (rmap_count > RMAP_RECYCLE_THRESHOLD)
                        rmap_recycle(vcpu, sptep, gfn);
        } else {
-               if (was_writeble)
+               if (was_writable)
                        kvm_release_pfn_dirty(pfn);
                else
                        kvm_release_pfn_clean(pfn);
@@ -2162,8 +2149,11 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
        spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
+                                 u32 access, u32 *error)
 {
+       if (error)
+               *error = 0;
        return vaddr;
 }
 
@@ -2747,7 +2737,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
        if (tdp_enabled)
                return 0;
 
-       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+       gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
 
        spin_lock(&vcpu->kvm->mmu_lock);
        r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
@@ -2847,16 +2837,13 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
         */
        page = alloc_page(GFP_KERNEL | __GFP_DMA32);
        if (!page)
-               goto error_1;
+               return -ENOMEM;
+
        vcpu->arch.mmu.pae_root = page_address(page);
        for (i = 0; i < 4; ++i)
                vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
 
        return 0;
-
-error_1:
-       free_mmu_pages(vcpu);
-       return -ENOMEM;
 }
 
 int kvm_mmu_create(struct kvm_vcpu *vcpu)
@@ -2936,10 +2923,9 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
        spin_lock(&kvm_lock);
 
        list_for_each_entry(kvm, &vm_list, vm_list) {
-               int npages;
+               int npages, idx;
 
-               if (!down_read_trylock(&kvm->slots_lock))
-                       continue;
+               idx = srcu_read_lock(&kvm->srcu);
                spin_lock(&kvm->mmu_lock);
                npages = kvm->arch.n_alloc_mmu_pages -
                         kvm->arch.n_free_mmu_pages;
@@ -2952,7 +2938,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
                nr_to_scan--;
 
                spin_unlock(&kvm->mmu_lock);
-               up_read(&kvm->slots_lock);
+               srcu_read_unlock(&kvm->srcu, idx);
        }
        if (kvm_freed)
                list_move_tail(&kvm_freed->vm_list, &vm_list);
@@ -3019,9 +3005,11 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
        int i;
        unsigned int nr_mmu_pages;
        unsigned int  nr_pages = 0;
+       struct kvm_memslots *slots;
 
-       for (i = 0; i < kvm->nmemslots; i++)
-               nr_pages += kvm->memslots[i].npages;
+       slots = rcu_dereference(kvm->memslots);
+       for (i = 0; i < slots->nmemslots; i++)
+               nr_pages += slots->memslots[i].npages;
 
        nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
        nr_mmu_pages = max(nr_mmu_pages,
@@ -3246,7 +3234,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
                if (is_shadow_present_pte(ent) && !is_last_spte(ent, level))
                        audit_mappings_page(vcpu, ent, va, level - 1);
                else {
-                       gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
+                       gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL);
                        gfn_t gfn = gpa >> PAGE_SHIFT;
                        pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
                        hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
@@ -3291,10 +3279,12 @@ static void audit_mappings(struct kvm_vcpu *vcpu)
 static int count_rmaps(struct kvm_vcpu *vcpu)
 {
        int nmaps = 0;
-       int i, j, k;
+       int i, j, k, idx;
 
+       idx = srcu_read_lock(&kvm->srcu);
+       slots = rcu_dereference(kvm->memslots);
        for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
-               struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
+               struct kvm_memory_slot *m = &slots->memslots[i];
                struct kvm_rmap_desc *d;
 
                for (j = 0; j < m->npages; ++j) {
@@ -3317,6 +3307,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
                        }
                }
        }
+       srcu_read_unlock(&kvm->srcu, idx);
        return nmaps;
 }
 
index 61a1b3884b4954b1173cc5c21aced330acb2c938..be66759321a546aeee68a37cdc84c179ef39266e 100644 (file)
@@ -2,6 +2,7 @@
 #define __KVM_X86_MMU_H
 
 #include <linux/kvm_host.h>
+#include "kvm_cache_regs.h"
 
 #define PT64_PT_BITS 9
 #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
 #define PT32_ROOT_LEVEL 2
 #define PT32E_ROOT_LEVEL 3
 
+#define PT_PDPE_LEVEL 3
+#define PT_DIRECTORY_LEVEL 2
+#define PT_PAGE_TABLE_LEVEL 1
+
+#define PFERR_PRESENT_MASK (1U << 0)
+#define PFERR_WRITE_MASK (1U << 1)
+#define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
+#define PFERR_FETCH_MASK (1U << 4)
+
 int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
 
 static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
@@ -53,30 +64,6 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
        return kvm_mmu_load(vcpu);
 }
 
-static inline int is_long_mode(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_X86_64
-       return vcpu->arch.shadow_efer & EFER_LMA;
-#else
-       return 0;
-#endif
-}
-
-static inline int is_pae(struct kvm_vcpu *vcpu)
-{
-       return vcpu->arch.cr4 & X86_CR4_PAE;
-}
-
-static inline int is_pse(struct kvm_vcpu *vcpu)
-{
-       return vcpu->arch.cr4 & X86_CR4_PSE;
-}
-
-static inline int is_paging(struct kvm_vcpu *vcpu)
-{
-       return vcpu->arch.cr0 & X86_CR0_PG;
-}
-
 static inline int is_present_gpte(unsigned long pte)
 {
        return pte & PT_PRESENT_MASK;
index ede2131a9225eb00530aafb62962e383ab2a7101..81eab9a50e6afdbe07c8d264e7db6dedcdc5daed 100644 (file)
@@ -162,7 +162,7 @@ walk:
                if (rsvd_fault)
                        goto access_error;
 
-               if (write_fault && !is_writeble_pte(pte))
+               if (write_fault && !is_writable_pte(pte))
                        if (user_fault || is_write_protection(vcpu))
                                goto access_error;
 
@@ -490,18 +490,23 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
        spin_unlock(&vcpu->kvm->mmu_lock);
 }
 
-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
+                              u32 *error)
 {
        struct guest_walker walker;
        gpa_t gpa = UNMAPPED_GVA;
        int r;
 
-       r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
+       r = FNAME(walk_addr)(&walker, vcpu, vaddr,
+                            !!(access & PFERR_WRITE_MASK),
+                            !!(access & PFERR_USER_MASK),
+                            !!(access & PFERR_FETCH_MASK));
 
        if (r) {
                gpa = gfn_to_gpa(walker.gfn);
                gpa |= vaddr & ~PAGE_MASK;
-       }
+       } else if (error)
+               *error = walker.error_code;
 
        return gpa;
 }
index 1d9b33843c80ef521dc059697285cfed06cfd7d7..52f78dd03010569eba75d6db749d83e4c19265e9 100644 (file)
@@ -231,7 +231,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
                efer &= ~EFER_LME;
 
        to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
-       vcpu->arch.shadow_efer = efer;
+       vcpu->arch.efer = efer;
 }
 
 static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
@@ -540,6 +540,8 @@ static void init_vmcb(struct vcpu_svm *svm)
        struct vmcb_control_area *control = &svm->vmcb->control;
        struct vmcb_save_area *save = &svm->vmcb->save;
 
+       svm->vcpu.fpu_active = 1;
+
        control->intercept_cr_read =    INTERCEPT_CR0_MASK |
                                        INTERCEPT_CR3_MASK |
                                        INTERCEPT_CR4_MASK;
@@ -552,13 +554,19 @@ static void init_vmcb(struct vcpu_svm *svm)
        control->intercept_dr_read =    INTERCEPT_DR0_MASK |
                                        INTERCEPT_DR1_MASK |
                                        INTERCEPT_DR2_MASK |
-                                       INTERCEPT_DR3_MASK;
+                                       INTERCEPT_DR3_MASK |
+                                       INTERCEPT_DR4_MASK |
+                                       INTERCEPT_DR5_MASK |
+                                       INTERCEPT_DR6_MASK |
+                                       INTERCEPT_DR7_MASK;
 
        control->intercept_dr_write =   INTERCEPT_DR0_MASK |
                                        INTERCEPT_DR1_MASK |
                                        INTERCEPT_DR2_MASK |
                                        INTERCEPT_DR3_MASK |
+                                       INTERCEPT_DR4_MASK |
                                        INTERCEPT_DR5_MASK |
+                                       INTERCEPT_DR6_MASK |
                                        INTERCEPT_DR7_MASK;
 
        control->intercept_exceptions = (1 << PF_VECTOR) |
@@ -569,6 +577,7 @@ static void init_vmcb(struct vcpu_svm *svm)
        control->intercept =    (1ULL << INTERCEPT_INTR) |
                                (1ULL << INTERCEPT_NMI) |
                                (1ULL << INTERCEPT_SMI) |
+                               (1ULL << INTERCEPT_SELECTIVE_CR0) |
                                (1ULL << INTERCEPT_CPUID) |
                                (1ULL << INTERCEPT_INVD) |
                                (1ULL << INTERCEPT_HLT) |
@@ -641,10 +650,8 @@ static void init_vmcb(struct vcpu_svm *svm)
                control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
                                        (1ULL << INTERCEPT_INVLPG));
                control->intercept_exceptions &= ~(1 << PF_VECTOR);
-               control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK|
-                                               INTERCEPT_CR3_MASK);
-               control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
-                                                INTERCEPT_CR3_MASK);
+               control->intercept_cr_read &= ~INTERCEPT_CR3_MASK;
+               control->intercept_cr_write &= ~INTERCEPT_CR3_MASK;
                save->g_pat = 0x0007040600070406ULL;
                save->cr3 = 0;
                save->cr4 = 0;
@@ -730,7 +737,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
        init_vmcb(svm);
 
        fx_init(&svm->vcpu);
-       svm->vcpu.fpu_active = 1;
        svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
        if (kvm_vcpu_is_bsp(&svm->vcpu))
                svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
@@ -765,14 +771,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        if (unlikely(cpu != vcpu->cpu)) {
                u64 delta;
 
-               /*
-                * Make sure that the guest sees a monotonically
-                * increasing TSC.
-                */
-               delta = vcpu->arch.host_tsc - native_read_tsc();
-               svm->vmcb->control.tsc_offset += delta;
-               if (is_nested(svm))
-                       svm->nested.hsave->control.tsc_offset += delta;
+               if (check_tsc_unstable()) {
+                       /*
+                        * Make sure that the guest sees a monotonically
+                        * increasing TSC.
+                        */
+                       delta = vcpu->arch.host_tsc - native_read_tsc();
+                       svm->vmcb->control.tsc_offset += delta;
+                       if (is_nested(svm))
+                               svm->nested.hsave->control.tsc_offset += delta;
+               }
                vcpu->cpu = cpu;
                kvm_migrate_timers(vcpu);
                svm->asid_generation = 0;
@@ -954,42 +962,59 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
        svm->vmcb->save.gdtr.base = dt->base ;
 }
 
+static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
+{
+}
+
 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 {
 }
 
+static void update_cr0_intercept(struct vcpu_svm *svm)
+{
+       ulong gcr0 = svm->vcpu.arch.cr0;
+       u64 *hcr0 = &svm->vmcb->save.cr0;
+
+       if (!svm->vcpu.fpu_active)
+               *hcr0 |= SVM_CR0_SELECTIVE_MASK;
+       else
+               *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
+                       | (gcr0 & SVM_CR0_SELECTIVE_MASK);
+
+
+       if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
+               svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
+               svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
+       } else {
+               svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
+               svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
+       }
+}
+
 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
 #ifdef CONFIG_X86_64
-       if (vcpu->arch.shadow_efer & EFER_LME) {
+       if (vcpu->arch.efer & EFER_LME) {
                if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
-                       vcpu->arch.shadow_efer |= EFER_LMA;
+                       vcpu->arch.efer |= EFER_LMA;
                        svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
                }
 
                if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
-                       vcpu->arch.shadow_efer &= ~EFER_LMA;
+                       vcpu->arch.efer &= ~EFER_LMA;
                        svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
                }
        }
 #endif
-       if (npt_enabled)
-               goto set;
+       vcpu->arch.cr0 = cr0;
 
-       if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
-               svm->vmcb-