* 'kvm-updates/2.6.34' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (145 commits)
KVM: x86: Add KVM_CAP_X86_ROBUST_SINGLESTEP
KVM: VMX: Update instruction length on intercepted BP
KVM: Fix emulate_sys[call, enter, exit]()'s fault handling
KVM: Fix segment descriptor loading
KVM: Fix load_guest_segment_descriptor() to inject page fault
KVM: x86 emulator: Forbid modifying CS segment register by mov instruction
KVM: Convert kvm->requests_lock to raw_spinlock_t
KVM: Convert i8254/i8259 locks to raw_spinlocks
KVM: x86 emulator: disallow opcode 82 in 64-bit mode
KVM: x86 emulator: code style cleanup
KVM: Plan obsolescence of kernel allocated slots, paravirt mmu
KVM: x86 emulator: Add LOCK prefix validity checking
KVM: x86 emulator: Check CPL level during privilege instruction emulation
KVM: x86 emulator: Fix popf emulation
KVM: x86 emulator: Check IOPL level during io instruction emulation
KVM: x86 emulator: fix memory access during x86 emulation
KVM: x86 emulator: Add Virtual-8086 mode of emulation
KVM: x86 emulator: Add group9 instruction decoding
KVM: x86 emulator: Add group8 instruction decoding
KVM: do not store wqh in irqfd
...
Trivial conflicts in Documentation/feature-removal-schedule.txt
NCCI TTY device nodes. User space (pppdcapiplugin) works without
noticing the difference.
Who: Jan Kiszka <jan.kiszka@web.de>
+
+----------------------------
+
+What: KVM memory aliases support
+When: July 2010
+Why: Memory aliasing support is used for speeding up guest vga access
+ through the vga windows.
+
+ Modern userspace no longer uses this feature, so it's just bitrotted
+ code and can be removed with no impact.
+Who: Avi Kivity <avi@redhat.com>
+
+----------------------------
+
+What: KVM kernel-allocated memory slots
+When: July 2010
+Why: Since 2.6.25, kvm supports user-allocated memory slots, which are
+ much more flexible than kernel-allocated slots. All current userspace
+ supports the newer interface and this code can be removed with no
+ impact.
+Who: Avi Kivity <avi@redhat.com>
+
+----------------------------
+
+What: KVM paravirt mmu host support
+When: January 2011
+Why: The paravirt mmu host support is slower than non-paravirt mmu, both
+ on newer and older hardware. It is already not exposed to the guest,
+ and kept only for live migration purposes.
+Who: Avi Kivity <avi@redhat.com>
+
+----------------------------
Only run vcpu ioctls from the same thread that was used to create the
vcpu.
-2. File descritpors
+2. File descriptors
The kvm API is centered around file descriptors. An initial
open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this
-handle will create a VM file descripror which can be used to issue VM
+handle will create a VM file descriptor which can be used to issue VM
ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
and return a file descriptor pointing to it. Finally, ioctls on a vcpu
fd can be used to control the vcpu, including the important task of
Parameters: struct kvm_clock_data (in)
Returns: 0 on success, -1 on error
-Sets the current timestamp of kvmclock to the valued specific in its parameter.
+Sets the current timestamp of kvmclock to the value specified in its parameter.
In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
such as migration.
__u64 data_offset; /* relative to kvm_run start */
} io;
-If exit_reason is KVM_EXIT_IO_IN or KVM_EXIT_IO_OUT, then the vcpu has
+If exit_reason is KVM_EXIT_IO, then the vcpu has
executed a port I/O instruction which could not be satisfied by kvm.
data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
where kvm expects application code to place the data for the next
-KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a patcked array.
+KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array.
struct {
struct kvm_debug_exit_arch arch;
__u8 is_write;
} mmio;
-If exit_reason is KVM_EXIT_MMIO or KVM_EXIT_IO_OUT, then the vcpu has
+If exit_reason is KVM_EXIT_MMIO, then the vcpu has
executed a memory-mapped I/O instruction which could not be satisfied
by kvm. The 'data' member contains the written data if 'is_write' is
true, and should be filled by application code otherwise.
F: arch/x86/kvm/svm.c
KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
-M: Hollis Blanchard <hollisb@us.ibm.com>
+M: Alexander Graf <agraf@suse.de>
L: kvm-ppc@vger.kernel.org
W: http://kvm.qumranet.com
S: Supported
select ANON_INODES
select HAVE_KVM_IRQCHIP
select KVM_APIC_ARCHITECTURE
+ select KVM_MMIO
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
return 0;
mmio:
if (p->dir)
- r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr,
+ r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
p->size, &p->data);
else
- r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr,
+ r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
p->size, &p->data);
if (r)
printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
union context *host_ctx, *guest_ctx;
- int r;
+ int r, idx;
- /*
- * down_read() may sleep and return with interrupts enabled
- */
- down_read(&vcpu->kvm->slots_lock);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
again:
if (signal_pending(current)) {
if (r < 0)
goto vcpu_run_fail;
- up_read(&vcpu->kvm->slots_lock);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
kvm_guest_enter();
/*
kvm_guest_exit();
preempt_enable();
- down_read(&vcpu->kvm->slots_lock);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_handle_exit(kvm_run, vcpu);
}
out:
- up_read(&vcpu->kvm->slots_lock);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
if (r > 0) {
kvm_resched(vcpu);
- down_read(&vcpu->kvm->slots_lock);
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
goto again;
}
goto out;
r = kvm_setup_default_irq_routing(kvm);
if (r) {
- kfree(kvm->arch.vioapic);
+ kvm_ioapic_destroy(kvm);
goto out;
}
break;
static void kvm_release_vm_pages(struct kvm *kvm)
{
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int i, j;
unsigned long base_gfn;
- for (i = 0; i < kvm->nmemslots; i++) {
- memslot = &kvm->memslots[i];
+ slots = rcu_dereference(kvm->memslots);
+ for (i = 0; i < slots->nmemslots; i++) {
+ memslot = &slots->memslots[i];
base_gfn = memslot->base_gfn;
for (j = 0; j < memslot->npages; j++) {
kfree(kvm->arch.vioapic);
kvm_release_vm_pages(kvm);
kvm_free_physmem(kvm);
+ cleanup_srcu_struct(&kvm->srcu);
free_kvm(kvm);
}
return r;
}
-int kvm_arch_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
+ struct kvm_userspace_memory_region *mem,
int user_alloc)
{
unsigned long i;
unsigned long pfn;
- int npages = mem->memory_size >> PAGE_SHIFT;
- struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+ int npages = memslot->npages;
unsigned long base_gfn = memslot->base_gfn;
if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
return 0;
}
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old,
+ int user_alloc)
+{
+ return;
+}
+
void kvm_arch_flush_shadow(struct kvm *kvm)
{
kvm_flush_remote_tlbs(kvm);
if (log->slot >= KVM_MEMORY_SLOTS)
goto out;
- memslot = &kvm->memslots[log->slot];
+ memslot = &kvm->memslots->memslots[log->slot];
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
struct kvm_memory_slot *memslot;
int is_dirty = 0;
+ mutex_lock(&kvm->slots_lock);
spin_lock(&kvm->arch.dirty_log_lock);
r = kvm_ia64_sync_dirty_log(kvm, log);
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
kvm_flush_remote_tlbs(kvm);
- memslot = &kvm->memslots[log->slot];
+ memslot = &kvm->memslots->memslots[log->slot];
n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
memset(memslot->dirty_bitmap, 0, n);
}
r = 0;
out:
+ mutex_unlock(&kvm->slots_lock);
spin_unlock(&kvm->arch.dirty_log_lock);
return r;
}
struct exit_ctl_data *p;
p = kvm_get_exit_data(vcpu);
- if (p && p->exit_reason == EXIT_REASON_PAL_CALL) {
+ if (p->exit_reason == EXIT_REASON_PAL_CALL) {
p->u.pal_data.ret = result;
return ;
}
struct exit_ctl_data *p;
p = kvm_get_exit_data(vcpu);
- if (p && p->exit_reason == EXIT_REASON_SAL_CALL) {
+ if (p->exit_reason == EXIT_REASON_SAL_CALL) {
p->u.sal_data.ret = result;
return ;
}
struct exit_ctl_data *p;
p = kvm_get_exit_data(vcpu);
- if (p && (p->exit_reason == EXIT_REASON_PAL_CALL))
+ if (p->exit_reason == EXIT_REASON_PAL_CALL)
index = p->u.pal_data.gr28;
return index;
p = kvm_get_exit_data(vcpu);
- if (p) {
- if (p->exit_reason == EXIT_REASON_SAL_CALL) {
- *in0 = p->u.sal_data.in0;
- *in1 = p->u.sal_data.in1;
- *in2 = p->u.sal_data.in2;
- *in3 = p->u.sal_data.in3;
- *in4 = p->u.sal_data.in4;
- *in5 = p->u.sal_data.in5;
- *in6 = p->u.sal_data.in6;
- *in7 = p->u.sal_data.in7;
- return ;
- }
+ if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+ *in0 = p->u.sal_data.in0;
+ *in1 = p->u.sal_data.in1;
+ *in2 = p->u.sal_data.in2;
+ *in3 = p->u.sal_data.in3;
+ *in4 = p->u.sal_data.in4;
+ *in5 = p->u.sal_data.in5;
+ *in6 = p->u.sal_data.in6;
+ *in7 = p->u.sal_data.in7;
+ return ;
}
*in0 = 0;
}
return;
} else {
inst_type = -1;
- panic_vm(vcpu, "Unsupported MMIO access instruction! \
- Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
+ panic_vm(vcpu, "Unsupported MMIO access instruction! "
+ "Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
bundle.i64[0], bundle.i64[1]);
}
* Otherwise panic
*/
if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
- panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
- & vpsr.is=0\n");
+ panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
+ "& vpsr.is=0\n");
/*
* For those IA64_PSR bits: id/da/dd/ss/ed/ia
#define RESUME_HOST RESUME_FLAG_HOST
#define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV)
+#define KVM_GUEST_MODE_NONE 0
+#define KVM_GUEST_MODE_GUEST 1
+#define KVM_GUEST_MODE_SKIP 2
+
+#define KVM_INST_FETCH_FAILED -1
+
#endif /* __POWERPC_KVM_ASM_H__ */
#include <linux/types.h>
#include <linux/kvm_host.h>
-#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s_64_asm.h>
struct kvmppc_slb {
u64 esid;
bool Ks;
bool Kp;
bool nx;
- bool large;
+ bool large; /* PTEs are 16MB */
+ bool tb; /* 1TB segment */
bool class;
};
struct kvmppc_vcpu_book3s {
struct kvm_vcpu vcpu;
+ struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
struct kvmppc_sid_map sid_map[SID_MAP_NUM];
struct kvmppc_slb slb[64];
struct {
u64 vsid_next;
u64 vsid_max;
int context_id;
+ ulong prog_flags; /* flags to inject when giving a 700 trap */
};
#define CONTEXT_HOST 0
extern u32 kvmppc_trampoline_lowmem;
extern u32 kvmppc_trampoline_enter;
+extern void kvmppc_rmcall(ulong srr0, ulong srr1);
+extern void kvmppc_load_up_fpu(void);
+extern void kvmppc_load_up_altivec(void);
+extern void kvmppc_load_up_vsx(void);
static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
{
#ifndef __ASM_KVM_BOOK3S_ASM_H__
#define __ASM_KVM_BOOK3S_ASM_H__
+#ifdef __ASSEMBLY__
+
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#include <asm/kvm_asm.h>
#endif /* CONFIG_KVM_BOOK3S_64_HANDLER */
+#else /*__ASSEMBLY__ */
+
+struct kvmppc_book3s_shadow_vcpu {
+ ulong gpr[14];
+ u32 cr;
+ u32 xer;
+ ulong host_r1;
+ ulong host_r2;
+ ulong handler;
+ ulong scratch0;
+ ulong scratch1;
+ ulong vmhandler;
+};
+
+#endif /*__ASSEMBLY__ */
+
#endif /* __ASM_KVM_BOOK3S_ASM_H__ */
u32 mas5;
u32 mas6;
u32 mas7;
+ u32 l1csr0;
u32 l1csr1;
u32 hid0;
u32 hid1;
+ u32 tlb0cfg;
+ u32 tlb1cfg;
struct kvm_vcpu vcpu;
};
ulong trampoline_lowmem;
ulong trampoline_enter;
ulong highmem_handler;
+ ulong rmcall;
ulong host_paca_phys;
struct kvmppc_mmu mmu;
#endif
- u64 fpr[32];
ulong gpr[32];
+ u64 fpr[32];
+ u32 fpscr;
+
+#ifdef CONFIG_ALTIVEC
+ vector128 vr[32];
+ vector128 vscr;
+#endif
+
+#ifdef CONFIG_VSX
+ u64 vsr[32];
+#endif
+
ulong pc;
- u32 cr;
ulong ctr;
ulong lr;
+
+#ifdef CONFIG_BOOKE
ulong xer;
+ u32 cr;
+#endif
ulong msr;
#ifdef CONFIG_PPC64
ulong shadow_msr;
+ ulong shadow_srr1;
ulong hflags;
+ ulong guest_owned_ext;
#endif
u32 mmucr;
ulong sprg0;
#endif
ulong fault_dear;
ulong fault_esr;
+ ulong queued_dear;
+ ulong queued_esr;
gpa_t paddr_accessed;
u8 io_gpr; /* GPR used as IO source/target */
#include <linux/types.h>
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/kvm_book3s.h>
+#endif
enum emulation_result {
EMULATE_DONE, /* no further processing */
extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
+#ifdef CONFIG_PPC_BOOK3S
+
+/* We assume we're always acting on the current vcpu */
+
+static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
+{
+ if ( num < 14 ) {
+ get_paca()->shadow_vcpu.gpr[num] = val;
+ to_book3s(vcpu)->shadow_vcpu.gpr[num] = val;
+ } else
+ vcpu->arch.gpr[num] = val;
+}
+
+static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
+{
+ if ( num < 14 )
+ return get_paca()->shadow_vcpu.gpr[num];
+ else
+ return vcpu->arch.gpr[num];
+}
+
+static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
+{
+ get_paca()->shadow_vcpu.cr = val;
+ to_book3s(vcpu)->shadow_vcpu.cr = val;
+}
+
+static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
+{
+ return get_paca()->shadow_vcpu.cr;
+}
+
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+{
+ get_paca()->shadow_vcpu.xer = val;
+ to_book3s(vcpu)->shadow_vcpu.xer = val;
+}
+
+static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+{
+ return get_paca()->shadow_vcpu.xer;
+}
+
+#else
+
+static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
+{
+ vcpu->arch.gpr[num] = val;
+}
+
+static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
+{
+ return vcpu->arch.gpr[num];
+}
+
+static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
+{
+ vcpu->arch.cr = val;
+}
+
+static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.cr;
+}
+
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+{
+ vcpu->arch.xer = val;
+}
+
+static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.xer;
+}
+
+#endif
+
#endif /* __POWERPC_KVM_PPC_H__ */
#include <asm/mmu.h>
#include <asm/page.h>
#include <asm/exception-64e.h>
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#include <asm/kvm_book3s_64_asm.h>
+#endif
register struct paca_struct *local_paca asm("r13");
u64 esid;
u64 vsid;
} kvm_slb[64]; /* guest SLB */
+ /* We use this to store guest state in */
+ struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
u8 kvm_slb_max; /* highest used guest slb entry */
u8 kvm_in_guest; /* are we inside the guest? */
#endif
#define SRR1_WAKEMT 0x00280000 /* mtctrl */
#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */
#define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */
+#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
+#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */
+#define SRR1_PROGTRAP 0x00020000 /* Trap */
+#define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */
#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */
#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */
DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest));
DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb));
DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max));
+ DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr));
+ DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer));
+ DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0]));
+ DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1]));
+ DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2]));
+ DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3]));
+ DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4]));
+ DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5]));
+ DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6]));
+ DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7]));
+ DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8]));
+ DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9]));
+ DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10]));
+ DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11]));
+ DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12]));
+ DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13]));
+ DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1));
+ DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2));
+ DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct,
+ shadow_vcpu.vmhandler));
+ DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct,
+ shadow_vcpu.scratch0));
+ DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct,
+ shadow_vcpu.scratch1));
#endif
#endif /* CONFIG_PPC64 */
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
- DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
- DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2));
DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+ DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
+ DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
-#endif
+#else
+ DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
+ DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
+#endif /* CONFIG_PPC64 */
#endif
#ifdef CONFIG_44x
DEFINE(PGD_T_LOG2, PGD_T_LOG2);
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
EXPORT_SYMBOL(giveup_vsx);
+EXPORT_SYMBOL_GPL(__giveup_vsx);
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
EXPORT_SYMBOL(giveup_spe);
*/
switch (dcrn) {
case DCRN_CPR0_CONFIG_ADDR:
- vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
break;
case DCRN_CPR0_CONFIG_DATA:
local_irq_disable();
mtdcr(DCRN_CPR0_CONFIG_ADDR,
vcpu->arch.cpr0_cfgaddr);
- vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
+ kvmppc_set_gpr(vcpu, rt,
+ mfdcr(DCRN_CPR0_CONFIG_DATA));
local_irq_enable();
break;
default:
/* emulate some access in kernel */
switch (dcrn) {
case DCRN_CPR0_CONFIG_ADDR:
- vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
+ vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
break;
default:
run->dcr.dcrn = dcrn;
- run->dcr.data = vcpu->arch.gpr[rs];
+ run->dcr.data = kvmppc_get_gpr(vcpu, rs);
run->dcr.is_write = 1;
vcpu->arch.dcr_needed = 1;
kvmppc_account_exit(vcpu, DCR_EXITS);
switch (sprn) {
case SPRN_PID:
- kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
+ kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break;
case SPRN_MMUCR:
- vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break;
case SPRN_CCR0:
- vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break;
case SPRN_CCR1:
- vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break;
default:
emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
}
switch (sprn) {
case SPRN_PID:
- vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break;
case SPRN_MMUCR:
- vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break;
case SPRN_CCR0:
- vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break;
case SPRN_CCR1:
- vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break;
default:
emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
}
struct kvmppc_44x_tlbe *tlbe;
unsigned int gtlb_index;
- gtlb_index = vcpu->arch.gpr[ra];
+ gtlb_index = kvmppc_get_gpr(vcpu, ra);
if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
printk("%s: index %d\n", __func__, gtlb_index);
kvmppc_dump_vcpu(vcpu);
switch (ws) {
case PPC44x_TLB_PAGEID:
tlbe->tid = get_mmucr_stid(vcpu);
- tlbe->word0 = vcpu->arch.gpr[rs];
+ tlbe->word0 = kvmppc_get_gpr(vcpu, rs);
break;
case PPC44x_TLB_XLAT:
- tlbe->word1 = vcpu->arch.gpr[rs];
+ tlbe->word1 = kvmppc_get_gpr(vcpu, rs);
break;
case PPC44x_TLB_ATTRIB:
- tlbe->word2 = vcpu->arch.gpr[rs];
+ tlbe->word2 = kvmppc_get_gpr(vcpu, rs);
break;
default:
unsigned int as = get_mmucr_sts(vcpu);
unsigned int pid = get_mmucr_stid(vcpu);
- ea = vcpu->arch.gpr[rb];
+ ea = kvmppc_get_gpr(vcpu, rb);
if (ra)
- ea += vcpu->arch.gpr[ra];
+ ea += kvmppc_get_gpr(vcpu, ra);
gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
if (rc) {
+ u32 cr = kvmppc_get_cr(vcpu);
+
if (gtlb_index < 0)
- vcpu->arch.cr &= ~0x20000000;
+ kvmppc_set_cr(vcpu, cr & ~0x20000000);
else
- vcpu->arch.cr |= 0x20000000;
+ kvmppc_set_cr(vcpu, cr | 0x20000000);
}
- vcpu->arch.gpr[rt] = gtlb_index;
+ kvmppc_set_gpr(vcpu, rt, gtlb_index);
kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
return EMULATE_DONE;
bool
select PREEMPT_NOTIFIERS
select ANON_INODES
+ select KVM_MMIO
config KVM_BOOK3S_64_HANDLER
bool
/* #define EXIT_DEBUG */
/* #define EXIT_DEBUG_SIMPLE */
+/* #define DEBUG_EXT */
-/* Without AGGRESSIVE_DEC we only fire off a DEC interrupt when DEC turns 0.
- * When set, we retrigger a DEC interrupt after that if DEC <= 0.
- * PPC32 Linux runs faster without AGGRESSIVE_DEC, PPC64 Linux requires it. */
-
-/* #define AGGRESSIVE_DEC */
+static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exits", VCPU_STAT(sum_exits) },
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb));
+ memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu,
+ sizeof(get_paca()->shadow_vcpu));
get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max;
}
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb));
+ memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
+ sizeof(get_paca()->shadow_vcpu));
to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max;
+
+ kvmppc_giveup_ext(vcpu, MSR_FP);
+ kvmppc_giveup_ext(vcpu, MSR_VEC);
+ kvmppc_giveup_ext(vcpu, MSR_VSX);
}
-#if defined(AGGRESSIVE_DEC) || defined(EXIT_DEBUG)
+#if defined(EXIT_DEBUG)
static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
{
u64 jd = mftb() - vcpu->arch.dec_jiffies;
}
#endif
+static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.shadow_msr = vcpu->arch.msr;
+ /* Guest MSR values */
+ vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
+ MSR_BE | MSR_DE;
+ /* Process MSR values */
+ vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
+ MSR_EE;
+ /* External providers the guest reserved */
+ vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
+ /* 64-bit Process MSR values */
+#ifdef CONFIG_PPC_BOOK3S_64
+ vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
+#endif
+}
+
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
{
ulong old_msr = vcpu->arch.msr;
#ifdef EXIT_DEBUG
printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
#endif
+
msr &= to_book3s(vcpu)->msr_mask;
vcpu->arch.msr = msr;
- vcpu->arch.shadow_msr = msr | MSR_USER32;
- vcpu->arch.shadow_msr &= ( MSR_VEC | MSR_VSX | MSR_FP | MSR_FE0 |
- MSR_USER64 | MSR_SE | MSR_BE | MSR_DE |
- MSR_FE1);
+ kvmppc_recalc_shadow_msr(vcpu);
if (msr & (MSR_WE|MSR_POW)) {
if (!vcpu->arch.pending_exceptions) {
vcpu->arch.mmu.reset_msr(vcpu);
}
-void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
+static int kvmppc_book3s_vec2irqprio(unsigned int vec)
{
unsigned int prio;
- vcpu->stat.queue_intr++;
switch (vec) {
case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break;
case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break;
default: prio = BOOK3S_IRQPRIO_MAX; break;
}
- set_bit(prio, &vcpu->arch.pending_exceptions);
+ return prio;
+}
+
+static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
+ unsigned int vec)
+{
+ clear_bit(kvmppc_book3s_vec2irqprio(vec),
+ &vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
+{
+ vcpu->stat.queue_intr++;
+
+ set_bit(kvmppc_book3s_vec2irqprio(vec),
+ &vcpu->arch.pending_exceptions);
#ifdef EXIT_DEBUG
printk(KERN_INFO "Queueing interrupt %x\n", vec);
#endif
}
-void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
{
+ to_book3s(vcpu)->prog_flags = flags;
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM);
}
return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
}
+void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
+{
+ kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
+}
+
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
{
int deliver = 1;
int vec = 0;
+ ulong flags = 0ULL;
switch (priority) {
case BOOK3S_IRQPRIO_DECREMENTER:
break;
case BOOK3S_IRQPRIO_PROGRAM:
vec = BOOK3S_INTERRUPT_PROGRAM;
+ flags = to_book3s(vcpu)->prog_flags;
break;
case BOOK3S_IRQPRIO_VSX:
vec = BOOK3S_INTERRUPT_VSX;
#endif
if (deliver)
- kvmppc_inject_interrupt(vcpu, vec, 0ULL);
+ kvmppc_inject_interrupt(vcpu, vec, flags);
return deliver;
}
unsigned long *pending = &vcpu->arch.pending_exceptions;
unsigned int priority;
- /* XXX be more clever here - no need to mftb() on every entry */
- /* Issue DEC again if it's still active */
-#ifdef AGGRESSIVE_DEC
- if (vcpu->arch.msr & MSR_EE)
- if (kvmppc_get_dec(vcpu) & 0x80000000)
- kvmppc_core_queue_dec(vcpu);
-#endif
-
#ifdef EXIT_DEBUG
if (vcpu->arch.pending_exceptions)
printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
#endif
priority = __ffs(*pending);
while (priority <= (sizeof(unsigned int) * 8)) {
- if (kvmppc_book3s_irqprio_deliver(vcpu, priority)) {
+ if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
+ (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
+ /* DEC interrupts get cleared by mtdec */
clear_bit(priority, &vcpu->arch.pending_exceptions);
break;
}
/* Page not found in guest PTE entries */
vcpu->arch.dear = vcpu->arch.fault_dear;
to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
- vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL);
+ vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
kvmppc_book3s_queue_irqprio(vcpu, vec);
} else if (page_found == -EPERM) {
/* Storage protection */
vcpu->arch.dear = vcpu->arch.fault_dear;
to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
- vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL);
+ vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
kvmppc_book3s_queue_irqprio(vcpu, vec);
} else if (page_found == -EINVAL) {
/* Page not found in guest SLB */
r = kvmppc_emulate_mmio(run, vcpu);
if ( r == RESUME_HOST_NV )
r = RESUME_HOST;
- if ( r == RESUME_GUEST_NV )
- r = RESUME_GUEST;
}
return r;
}
+static inline int get_fpr_index(int i)
+{
+#ifdef CONFIG_VSX
+ i *= 2;
+#endif
+ return i;
+}
+
+/* Give up external provider (FPU, Altivec, VSX) */
+static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
+{
+ struct thread_struct *t = ¤t->thread;
+ u64 *vcpu_fpr = vcpu->arch.fpr;
+ u64 *vcpu_vsx = vcpu->arch.vsr;
+ u64 *thread_fpr = (u64*)t->fpr;
+ int i;
+
+ if (!(vcpu->arch.guest_owned_ext & msr))
+ return;
+
+#ifdef DEBUG_EXT
+ printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
+#endif
+
+ switch (msr) {
+ case MSR_FP:
+ giveup_fpu(current);
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
+ vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
+
+ vcpu->arch.fpscr = t->fpscr.val;
+ break;
+ case MSR_VEC:
+#ifdef CONFIG_ALTIVEC
+ giveup_altivec(current);
+ memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
+ vcpu->arch.vscr = t->vscr;
+#endif
+ break;
+ case MSR_VSX:
+#ifdef CONFIG_VSX
+ __giveup_vsx(current);
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
+ vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
+#endif
+ break;
+ default:
+ BUG();
+ }
+
+ vcpu->arch.guest_owned_ext &= ~msr;
+ current->thread.regs->msr &= ~msr;
+ kvmppc_recalc_shadow_msr(vcpu);
+}
+
+/* Handle external providers (FPU, Altivec, VSX) */
+static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
+ ulong msr)
+{
+ struct thread_struct *t = ¤t->thread;
+ u64 *vcpu_fpr = vcpu->arch.fpr;
+ u64 *vcpu_vsx = vcpu->arch.vsr;
+ u64 *thread_fpr = (u64*)t->fpr;
+ int i;
+
+ if (!(vcpu->arch.msr & msr)) {
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ return RESUME_GUEST;
+ }
+
+#ifdef DEBUG_EXT
+ printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
+#endif
+
+ current->thread.regs->msr |= msr;
+
+ switch (msr) {
+ case MSR_FP:
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
+ thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
+
+ t->fpscr.val = vcpu->arch.fpscr;
+ t->fpexc_mode = 0;
+ kvmppc_load_up_fpu();
+ break;
+ case MSR_VEC:
+#ifdef CONFIG_ALTIVEC
+ memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
+ t->vscr = vcpu->arch.vscr;
+ t->vrsave = -1;
+ kvmppc_load_up_altivec();
+#endif
+ break;
+ case MSR_VSX:
+#ifdef CONFIG_VSX
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
+ thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
+ kvmppc_load_up_vsx();
+#endif
+ break;
+ default:
+ BUG();
+ }
+
+ vcpu->arch.guest_owned_ext |= msr;
+
+ kvmppc_recalc_shadow_msr(vcpu);
+
+ return RESUME_GUEST;
+}
+
int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
case BOOK3S_INTERRUPT_INST_STORAGE:
vcpu->stat.pf_instruc++;
/* only care about PTEG not found errors, but leave NX alone */
- if (vcpu->arch.shadow_msr & 0x40000000) {
+ if (vcpu->arch.shadow_srr1 & 0x40000000) {
r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr);
vcpu->stat.sp_instruc++;
} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
*/
kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
} else {
- vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x58000000);
+ vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000;
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
r = RESUME_GUEST;
case BOOK3S_INTERRUPT_PROGRAM:
{
enum emulation_result er;
+ ulong flags;
+
+ flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
if (vcpu->arch.msr & MSR_PR) {
#ifdef EXIT_DEBUG
#endif
if ((vcpu->arch.last_inst & 0xff0007ff) !=
(INS_DCBZ & 0xfffffff7)) {
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ kvmppc_core_queue_program(vcpu, flags);
r = RESUME_GUEST;
break;
}
er = kvmppc_emulate_instruction(run, vcpu);
switch (er) {
case EMULATE_DONE:
- r = RESUME_GUEST;
+ r = RESUME_GUEST_NV;
break;
case EMULATE_FAIL:
printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
__func__, vcpu->arch.pc, vcpu->arch.last_inst);
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ kvmppc_core_queue_program(vcpu, flags);
r = RESUME_GUEST;
break;
default:
}
case BOOK3S_INTERRUPT_SYSCALL:
#ifdef EXIT_DEBUG
- printk(KERN_INFO "Syscall Nr %d\n", (int)vcpu->arch.gpr[0]);
+ printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0));
#endif
vcpu->stat.syscall_exits++;
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
r = RESUME_GUEST;
break;
- case BOOK3S_INTERRUPT_MACHINE_CHECK:
case BOOK3S_INTERRUPT_FP_UNAVAIL:
- case BOOK3S_INTERRUPT_TRACE:
+ r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP);
+ break;
case BOOK3S_INTERRUPT_ALTIVEC:
+ r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC);
+ break;
case BOOK3S_INTERRUPT_VSX:
+ r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX);
+ break;
+ case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ case BOOK3S_INTERRUPT_TRACE:
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
r = RESUME_GUEST;
break;
default:
/* Ugh - bork here! What did we get? */
- printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", exit_nr, vcpu->arch.pc, vcpu->arch.shadow_msr);
+ printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
+ exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1);
r = RESUME_HOST;
BUG();
break;
int i;
regs->pc = vcpu->arch.pc;
- regs->cr = vcpu->arch.cr;
+ regs->cr = kvmppc_get_cr(vcpu);
regs->ctr = vcpu->arch.ctr;
regs->lr = vcpu->arch.lr;
- regs->xer = vcpu->arch.xer;
+ regs->xer = kvmppc_get_xer(vcpu);
regs->msr = vcpu->arch.msr;
regs->srr0 = vcpu->arch.srr0;
regs->srr1 = vcpu->arch.srr1;
regs->sprg7 = vcpu->arch.sprg6;
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
- regs->gpr[i] = vcpu->arch.gpr[i];
+ regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
return 0;
}
int i;
vcpu->arch.pc = regs->pc;
- vcpu->arch.cr = regs->cr;
+ kvmppc_set_cr(vcpu, regs->cr);
vcpu->arch.ctr = regs->ctr;
vcpu->arch.lr = regs->lr;
- vcpu->arch.xer = regs->xer;
+ kvmppc_set_xer(vcpu, regs->xer);
kvmppc_set_msr(vcpu, regs->msr);
vcpu->arch.srr0 = regs->srr0;
vcpu->arch.srr1 = regs->srr1;
vcpu->arch.sprg6 = regs->sprg5;
vcpu->arch.sprg7 = regs->sprg6;
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
- vcpu->arch.gpr[i] = regs->gpr[i];
+ for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+ kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
return 0;
}
int is_dirty = 0;
int r, n;
- down_write(&kvm->slots_lock);
+ mutex_lock(&kvm->slots_lock);
r = kvm_get_dirty_log(kvm, log, &is_dirty);
if (r)
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
- memslot = &kvm->memslots[log->slot];
+ memslot = &kvm->memslots->memslots[log->slot];
ga = memslot->base_gfn << PAGE_SHIFT;
ga_end = ga + (memslot->npages << PAGE_SHIFT);
r = 0;
out:
- up_write(&kvm->slots_lock);
+ mutex_unlock(&kvm->slots_lock);
return r;
}
vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
+ vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
vcpu->arch.shadow_msr = MSR_USER64;
int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
int ret;
+ struct thread_struct ext_bkp;
+ bool save_vec = current->thread.used_vr;
+ bool save_vsx = current->thread.used_vsr;
+ ulong ext_msr;
/* No need to go into the guest when all we do is going out */
if (signal_pending(current)) {
return -EINTR;
}
+ /* Save FPU state in stack */
+ if (current->thread.regs->msr & MSR_FP)
+ giveup_fpu(current);
+ memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr));
+ ext_bkp.fpscr = current->thread.fpscr;
+ ext_bkp.fpexc_mode = current->thread.fpexc_mode;
+
+#ifdef CONFIG_ALTIVEC
+ /* Save Altivec state in stack */
+ if (save_vec) {
+ if (current->thread.regs->msr & MSR_VEC)
+ giveup_altivec(current);
+ memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr));
+ ext_bkp.vscr = current->thread.vscr;
+ ext_bkp.vrsave = current->thread.vrsave;
+ }
+ ext_bkp.used_vr = current->thread.used_vr;
+#endif
+
+#ifdef CONFIG_VSX
+ /* Save VSX state in stack */
+ if (save_vsx && (current->thread.regs->msr & MSR_VSX))
+ __giveup_vsx(current);
+ ext_bkp.used_vsr = current->thread.used_vsr;
+#endif
+
+ /* Remember the MSR with disabled extensions */
+ ext_msr = current->thread.regs->msr;
+
/* XXX we get called with irq disabled - change that! */
local_irq_enable();
local_irq_disable();
+ current->thread.regs->msr = ext_msr;
+
+ /* Make sure we save the guest FPU/Altivec/VSX state */
+ kvmppc_giveup_ext(vcpu, MSR_FP);
+ kvmppc_giveup_ext(vcpu, MSR_VEC);
+ kvmppc_giveup_ext(vcpu, MSR_VSX);
+
+ /* Restore FPU state from stack */
+ memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr));
+ current->thread.fpscr = ext_bkp.fpscr;
+ current->thread.fpexc_mode = ext_bkp.fpexc_mode;
+
+#ifdef CONFIG_ALTIVEC
+ /* Restore Altivec state from stack */
+ if (save_vec && current->thread.used_vr) {
+ memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr));
+ current->thread.vscr = ext_bkp.vscr;
+ current->thread.vrsave= ext_bkp.vrsave;
+ }
+ current->thread.used_vr = ext_bkp.used_vr;
+#endif
+
+#ifdef CONFIG_VSX
+ current->thread.used_vsr = ext_bkp.used_vsr;
+#endif
+
return ret;
}
case 31:
switch (get_xop(inst)) {
case OP_31_XOP_MFMSR:
- vcpu->arch.gpr[get_rt(inst)] = vcpu->arch.msr;
+ kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr);
break;
case OP_31_XOP_MTMSRD:
{
- ulong rs = vcpu->arch.gpr[get_rs(inst)];
+ ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
if (inst & 0x10000) {
vcpu->arch.msr &= ~(MSR_RI | MSR_EE);
vcpu->arch.msr |= rs & (MSR_RI | MSR_EE);
break;
}
case OP_31_XOP_MTMSR:
- kvmppc_set_msr(vcpu, vcpu->arch.gpr[get_rs(inst)]);
+ kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst)));
break;
case OP_31_XOP_MFSRIN:
{
int srnum;
- srnum = (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf;
+ srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf;
if (vcpu->arch.mmu.mfsrin) {
u32 sr;
sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
- vcpu->arch.gpr[get_rt(inst)] = sr;
+ kvmppc_set_gpr(vcpu, get_rt(inst), sr);
}
break;
}
case OP_31_XOP_MTSRIN:
vcpu->arch.mmu.mtsrin(vcpu,
- (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf,
- vcpu->arch.gpr[get_rs(inst)]);
+ (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf,
+ kvmppc_get_gpr(vcpu, get_rs(inst)));
break;
case OP_31_XOP_TLBIE:
case OP_31_XOP_TLBIEL:
{
bool large = (inst & 0x00200000) ? true : false;
- ulong addr = vcpu->arch.gpr[get_rb(inst)];
+ ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst));
vcpu->arch.mmu.tlbie(vcpu, addr, large);
break;
}
if (!vcpu->arch.mmu.slbmte)
return EMULATE_FAIL;
- vcpu->arch.mmu.slbmte(vcpu, vcpu->arch.gpr[get_rs(inst)],
- vcpu->arch.gpr[get_rb(inst)]);
+ vcpu->arch.mmu.slbmte(vcpu,
+ kvmppc_get_gpr(vcpu, get_rs(inst)),
+ kvmppc_get_gpr(vcpu, get_rb(inst)));
break;
case OP_31_XOP_SLBIE:
if (!vcpu->arch.mmu.slbie)
return EMULATE_FAIL;
- vcpu->arch.mmu.slbie(vcpu, vcpu->arch.gpr[get_rb(inst)]);
+ vcpu->arch.mmu.slbie(vcpu,
+ kvmppc_get_gpr(vcpu, get_rb(inst)));
break;
case OP_31_XOP_SLBIA:
if (!vcpu->arch.mmu.slbia)
} else {
ulong t, rb;
- rb = vcpu->arch.gpr[get_rb(inst)];
+ rb = kvmppc_get_gpr(vcpu, get_rb(inst));
t = vcpu->arch.mmu.slbmfee(vcpu, rb);
- vcpu->arch.gpr[get_rt(inst)] = t;
+ kvmppc_set_gpr(vcpu, get_rt(inst), t);
}
break;
case OP_31_XOP_SLBMFEV:
} else {
ulong t, rb;
- rb = vcpu->arch.gpr[get_rb(inst)];
+ rb = kvmppc_get_gpr(vcpu, get_rb(inst));
t = vcpu->arch.mmu.slbmfev(vcpu, rb);
- vcpu->arch.gpr[get_rt(inst)] = t;
+ kvmppc_set_gpr(vcpu, get_rt(inst), t);
}
break;
case OP_31_XOP_DCBZ:
{
- ulong rb = vcpu->arch.gpr[get_rb(inst)];
+ ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
ulong ra = 0;
ulong addr;
u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
if (get_ra(inst))
- ra = vcpu->arch.gpr[get_ra(inst)];
+ ra = kvmppc_get_gpr(vcpu, get_ra(inst));
addr = (ra + rb) & ~31ULL;
if (!(vcpu->arch.msr & MSR_SF))
int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
{
int emulated = EMULATE_DONE;
+ ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_SDR1:
- to_book3s(vcpu)->sdr1 = vcpu->arch.gpr[rs];
+ to_book3s(vcpu)->sdr1 = spr_val;
break;
case SPRN_DSISR:
- to_book3s(vcpu)->dsisr = vcpu->arch.gpr[rs];
+ to_book3s(vcpu)->dsisr = spr_val;
break;
case SPRN_DAR:
- vcpu->arch.dear = vcpu->arch.gpr[rs];
+ vcpu->arch.dear = spr_val;
break;
case SPRN_HIOR:
- to_book3s(vcpu)->hior = vcpu->arch.gpr[rs];
+ to_book3s(vcpu)->hior = spr_val;
break;
case SPRN_IBAT0U ... SPRN_IBAT3L:
case SPRN_IBAT4U ... SPRN_IBAT7L:
case SPRN_DBAT0U ... SPRN_DBAT3L:
case SPRN_DBAT4U ... SPRN_DBAT7L:
- kvmppc_write_bat(vcpu, sprn, (u32)vcpu->arch.gpr[rs]);
+ kvmppc_write_bat(vcpu, sprn, (u32)spr_val);
/* BAT writes happen so rarely that we're ok to flush
* everything here */
kvmppc_mmu_pte_flush(vcpu, 0, 0);
break;
case SPRN_HID0:
- to_book3s(vcpu)->hid[0] = vcpu->arch.gpr[rs];
+ to_book3s(vcpu)->hid[0] = spr_val;
break;
case SPRN_HID1:
- to_book3s(vcpu)->hid[1] = vcpu->arch.gpr[rs];
+ to_book3s(vcpu)->hid[1] = spr_val;
break;
case SPRN_HID2:
- to_book3s(vcpu)->hid[2] = vcpu->arch.gpr[rs];
+ to_book3s(vcpu)->hid[2] = spr_val;
break;
case SPRN_HID4:
- to_book3s(vcpu)->hid[4] = vcpu->arch.gpr[rs];
+ to_book3s(vcpu)->hid[4] = spr_val;
break;
case SPRN_HID5:
- to_book3s(vcpu)->hid[5] = vcpu->arch.gpr[rs];
+ to_book3s(vcpu)->hid[5] = spr_val;
/* guest HID5 set can change is_dcbz32 */
if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
(mfmsr() & MSR_HV))
switch (sprn) {
case SPRN_SDR1:
- vcpu->arch.gpr[rt] = to_book3s(vcpu)->sdr1;
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
break;
case SPRN_DSISR:
- vcpu->arch.gpr[rt] = to_book3s(vcpu)->dsisr;
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr);
break;
case SPRN_DAR:
- vcpu->arch.gpr[rt] = vcpu->arch.dear;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear);
break;
case SPRN_HIOR:
- vcpu->arch.gpr[rt] = to_book3s(vcpu)->hior;
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
break;
case SPRN_HID0:
- vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[0];
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]);
break;
case SPRN_HID1:
- vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[1];
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]);
break;
case SPRN_HID2:
- vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[2];
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]);
break;
case SPRN_HID4:
- vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[4];
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]);
break;
case SPRN_HID5:
- vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[5];
+ kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
break;
case SPRN_THRM1:
case SPRN_THRM2:
case SPRN_THRM3:
case SPRN_CTRLF:
case SPRN_CTRLT:
- vcpu->arch.gpr[rt] = 0;
+ kvmppc_set_gpr(vcpu, rt, 0);
break;
default:
printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter);
EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem);
+EXPORT_SYMBOL_GPL(kvmppc_rmcall);
+EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
+#ifdef CONFIG_ALTIVEC
+EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
+#endif
+#ifdef CONFIG_VSX
+EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
+#endif
#define ULONG_SIZE 8
#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
-.macro mfpaca tmp_reg, src_reg, offset, vcpu_reg
- ld \tmp_reg, (PACA_EXMC+\offset)(r13)
- std \tmp_reg, VCPU_GPR(\src_reg)(\vcpu_reg)
-.endm
-
.macro DISABLE_INTERRUPTS
mfmsr r0
rldicl r0,r0,48,1
mtmsrd r0,1
.endm
+#define VCPU_LOAD_NVGPRS(vcpu) \
+ ld r14, VCPU_GPR(r14)(vcpu); \
+ ld r15, VCPU_GPR(r15)(vcpu); \
+ ld r16, VCPU_GPR(r16)(vcpu); \
+ ld r17, VCPU_GPR(r17)(vcpu); \
+ ld r18, VCPU_GPR(r18)(vcpu); \
+ ld r19, VCPU_GPR(r19)(vcpu); \
+ ld r20, VCPU_GPR(r20)(vcpu); \
+ ld r21, VCPU_GPR(r21)(vcpu); \
+ ld r22, VCPU_GPR(r22)(vcpu); \
+ ld r23, VCPU_GPR(r23)(vcpu); \
+ ld r24, VCPU_GPR(r24)(vcpu); \
+ ld r25, VCPU_GPR(r25)(vcpu); \
+ ld r26, VCPU_GPR(r26)(vcpu); \
+ ld r27, VCPU_GPR(r27)(vcpu); \
+ ld r28, VCPU_GPR(r28)(vcpu); \
+ ld r29, VCPU_GPR(r29)(vcpu); \
+ ld r30, VCPU_GPR(r30)(vcpu); \
+ ld r31, VCPU_GPR(r31)(vcpu); \
+
/*****************************************************************************
* *
* Guest entry / exit code that is in kernel module memory (highmem) *
SAVE_NVGPRS(r1)
/* Save LR */
- mflr r14
- std r14, _LINK(r1)
-
-/* XXX optimize non-volatile loading away */
-kvm_start_lightweight:
+ std r0, _LINK(r1)
- DISABLE_INTERRUPTS
+ /* Load non-volatile guest state from the vcpu */
+ VCPU_LOAD_NVGPRS(r4)
/* Save R1/R2 in the PACA */
- std r1, PACAR1(r13)
- std r2, (PACA_EXMC+EX_SRR0)(r13)
+ std r1, PACA_KVM_HOST_R1(r13)
+ std r2, PACA_KVM_HOST_R2(r13)
+
+ /* XXX swap in/out on load? */
ld r3, VCPU_HIGHMEM_HANDLER(r4)
- std r3, PACASAVEDMSR(r13)
+ std r3, PACA_KVM_VMHANDLER(r13)
- /* Load non-volatile guest state from the vcpu */
- ld r14, VCPU_GPR(r14)(r4)
- ld r15, VCPU_GPR(r15)(r4)
- ld r16, VCPU_GPR(r16)(r4)
- ld r17, VCPU_GPR(r17)(r4)
- ld r18, VCPU_GPR(r18)(r4)
- ld r19, VCPU_GPR(r19)(r4)
- ld r20, VCPU_GPR(r20)(r4)
- ld r21, VCPU_GPR(r21)(r4)
- ld r22, VCPU_GPR(r22)(r4)
- ld r23, VCPU_GPR(r23)(r4)
- ld r24, VCPU_GPR(r24)(r4)
- ld r25, VCPU_GPR(r25)(r4)
- ld r26, VCPU_GPR(r26)(r4)
- ld r27, VCPU_GPR(r27)(r4)
- ld r28, VCPU_GPR(r28)(r4)
- ld r29, VCPU_GPR(r29)(r4)
- ld r30, VCPU_GPR(r30)(r4)
- ld r31, VCPU_GPR(r31)(r4)
+kvm_start_lightweight:
ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */
ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
- ld r3, VCPU_TRAMPOLINE_ENTER(r4)
- mtsrr0 r3
-
- LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
- mtsrr1 r3
-
- /* Load guest state in the respective registers */
- lwz r3, VCPU_CR(r4) /* r3 = vcpu->arch.cr */
- stw r3, (PACA_EXMC + EX_CCR)(r13)
-
- ld r3, VCPU_CTR(r4) /* r3 = vcpu->arch.ctr */
- mtctr r3 /* CTR = r3 */
+ /* Load some guest state in the respective registers */
+ ld r5, VCPU_CTR(r4) /* r5 = vcpu->arch.ctr */
+ /* will be swapped in by rmcall */
ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */
mtlr r3 /* LR = r3 */
- ld r3, VCPU_XER(r4) /* r3 = vcpu->arch.xer */
- std r3, (PACA_EXMC + EX_R3)(r13)
+ DISABLE_INTERRUPTS
/* Some guests may need to have dcbz set to 32 byte length.
*
mtspr SPRN_HID5,r3
no_dcbz32_on:
- /* Load guest GPRs */
-
- ld r3, VCPU_GPR(r9)(r4)
- std r3, (PACA_EXMC + EX_R9)(r13)
- ld r3, VCPU_GPR(r10)(r4)
- std r3, (PACA_EXMC + EX_R10)(r13)
- ld r3, VCPU_GPR(r11)(r4)
- std r3, (PACA_EXMC + EX_R11)(r13)
- ld r3, VCPU_GPR(r12)(r4)
- std r3, (PACA_EXMC + EX_R12)(r13)
- ld r3, VCPU_GPR(r13)(r4)
- std r3, (PACA_EXMC + EX_R13)(r13)
-
- ld r0, VCPU_GPR(r0)(r4)
- ld r1, VCPU_GPR(r1)(r4)
- ld r2, VCPU_GPR(r2)(r4)
- ld r3, VCPU_GPR(r3)(r4)
- ld r5, VCPU_GPR(r5)(r4)
- ld r6, VCPU_GPR(r6)(r4)
- ld r7, VCPU_GPR(r7)(r4)
- ld r8, VCPU_GPR(r8)(r4)
- ld r4, VCPU_GPR(r4)(r4)
-
- /* This sets the Magic value for the trampoline */
-
- li r11, 1
- stb r11, PACA_KVM_IN_GUEST(r13)
+
+ ld r6, VCPU_RMCALL(r4)
+ mtctr r6
+
+ ld r3, VCPU_TRAMPOLINE_ENTER(r4)
+ LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
/* Jump to SLB patching handlder and into our guest */
- RFI
+ bctr
/*
* This is the handler in module memory. It gets jumped at from the
/*
* Register usage at this point:
*
- * R00 = guest R13
- * R01 = host R1
- * R02 = host R2
- * R10 = guest PC
- * R11 = guest MSR
- * R12 = exit handler id
- * R13 = PACA
- * PACA.exmc.R9 = guest R1
- * PACA.exmc.R10 = guest R10
- * PACA.exmc.R11 = guest R11
- * PACA.exmc.R12 = guest R12
- * PACA.exmc.R13 = guest R2
- * PACA.exmc.DAR = guest DAR
- * PACA.exmc.DSISR = guest DSISR
- * PACA.exmc.LR = guest instruction
- * PACA.exmc.CCR = guest CR
- * PACA.exmc.SRR0 = guest R0
+ * R0 = guest last inst
+ * R1 = host R1
+ * R2 = host R2
+ * R3 = guest PC
+ * R4 = guest MSR
+ * R5 = guest DAR
+ * R6 = guest DSISR
+ * R13 = PACA
+ * PACA.KVM.* = guest *
*
*/
- std r3, (PACA_EXMC+EX_R3)(r13)
+ /* R7 = vcpu */
+ ld r7, GPR4(r1)
- /* save the exit id in R3 */
- mr r3, r12
+ /* Now save the guest state */
- /* R12 = vcpu */
- ld r12, GPR4(r1)
+ stw r0, VCPU_LAST_INST(r7)
- /* Now save the guest state */
+ std r3, VCPU_PC(r7)
+ std r4, VCPU_SHADOW_SRR1(r7)
+ std r5, VCPU_FAULT_DEAR(r7)
+ std r6, VCPU_FAULT_DSISR(r7)
- std r0, VCPU_GPR(r13)(r12)
- std r4, VCPU_GPR(r4)(r12)
- std r5, VCPU_GPR(r5)(r12)
- std r6, VCPU_GPR(r6)(r12)
- std r7, VCPU_GPR(r7)(r12)
- std r8, VCPU_GPR(r8)(r12)
- std r9, VCPU_GPR(r9)(r12)
-
- /* get registers from PACA */
- mfpaca r5, r0, EX_SRR0, r12
- mfpaca r5, r3, EX_R3, r12
- mfpaca r5, r1, EX_R9, r12
- mfpaca r5, r10, EX_R10, r12
- mfpaca r5, r11, EX_R11, r12
- mfpaca r5, r12, EX_R12, r12
- mfpaca r5, r2, EX_R13, r12
-
- lwz r5, (PACA_EXMC+EX_LR)(r13)
- stw r5, VCPU_LAST_INST(r12)
-
- lwz r5, (PACA_EXMC+EX_CCR)(r13)
- stw r5, VCPU_CR(r12)
-
- ld r5, VCPU_HFLAGS(r12)
+ ld r5, VCPU_HFLAGS(r7)
rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */
beq no_dcbz32_off
+ li r4, 0
mfspr r5,SPRN_HID5
- rldimi r5,r5,6,56
+ rldimi r5,r4,6,56
mtspr SPRN_HID5,r5
no_dcbz32_off:
- /* XXX maybe skip on lightweight? */
- std r14, VCPU_GPR(r14)(r12)
- std r15, VCPU_GPR(r15)(r12)
- std r16, VCPU_GPR(r16)(r12)
- std r17, VCPU_GPR(r17)(r12)
- std r18, VCPU_GPR(r18)(r12)
- std r19, VCPU_GPR(r19)(r12)
- std r20, VCPU_GPR(r20)(r12)
- std r21, VCPU_GPR(r21)(r12)
- std r22, VCPU_GPR(r22)(r12)
- std r23, VCPU_GPR(r23)(r12)
- std r24, VCPU_GPR(r24)(r12)
- std r25, VCPU_GPR(r25)(r12)
- std r26, VCPU_GPR(r26)(r12)
- std r27, VCPU_GPR(r27)(r12)
- std r28, VCPU_GPR(r28)(r12)
- std r29, VCPU_GPR(r29)(r12)
- std r30, VCPU_GPR(r30)(r12)
- std r31, VCPU_GPR(r31)(r12)
-
- /* Restore non-volatile host registers (r14 - r31) */
- REST_NVGPRS(r1)
-
- /* Save guest PC (R10) */
- std r10, VCPU_PC(r12)
-
- /* Save guest msr (R11) */
- std r11, VCPU_SHADOW_MSR(r12)
-
- /* Save guest CTR (in R12) */
+ std r14, VCPU_GPR(r14)(r7)
+ std r15, VCPU_GPR(r15)(r7)
+ std r16, VCPU_GPR(r16)(r7)
+ std r17, VCPU_GPR(r17)(r7)
+ std r18, VCPU_GPR(r18)(r7)
+ std r19, VCPU_GPR(r19)(r7)
+ std r20, VCPU_GPR(r20)(r7)
+ std r21, VCPU_GPR(r21)(r7)
+ std r22, VCPU_GPR(r22)(r7)
+ std r23, VCPU_GPR(r23)(r7)
+ std r24, VCPU_GPR(r24)(r7)
+ std r25, VCPU_GPR(r25)(r7)
+ std r26, VCPU_GPR(r26)(r7)
+ std r27, VCPU_GPR(r27)(r7)
+ std r28, VCPU_GPR(r28)(r7)
+ std r29, VCPU_GPR(r29)(r7)
+ std r30, VCPU_GPR(r30)(r7)
+ std r31, VCPU_GPR(r31)(r7)
+
+ /* Save guest CTR */
mfctr r5
- std r5, VCPU_CTR(r12)
+ std r5, VCPU_CTR(r7)
/* Save guest LR */
mflr r5
- std r5, VCPU_LR(r12)
-
- /* Save guest XER */
- mfxer r5
- std r5, VCPU_XER(r12)
-
- /* Save guest DAR */
- ld r5, (PACA_EXMC+EX_DAR)(r13)
- std r5, VCPU_FAULT_DEAR(r12)
-
- /* Save guest DSISR */
- lwz r5, (PACA_EXMC+EX_DSISR)(r13)
- std r5, VCPU_FAULT_DSISR(r12)
+ std r5, VCPU_LR(r7)
/* Restore host msr -> SRR1 */
- ld r7, VCPU_HOST_MSR(r12)
- mtsrr1 r7
-
- /* Restore host IP -> SRR0 */
- ld r6, VCPU_HOST_RETIP(r12)
- mtsrr0 r6
+ ld r6, VCPU_HOST_MSR(r7)
/*
* For some interrupts, we need to call the real Linux
* r3 = address of interrupt handler (exit reason)
*/
- cmpwi r3, BOOK3S_INTERRUPT_EXTERNAL
+ cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
beq call_linux_handler
- cmpwi r3, BOOK3S_INTERRUPT_DECREMENTER
+ cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
beq call_linux_handler
- /* Back to Interruptable Mode! (goto kvm_return_point) */
- RFI
+ /* Back to EE=1 */
+ mtmsr r6
+ b kvm_return_point
call_linux_handler:
* interrupt handler!
*
* R3 still contains the exit code,
- * R6 VCPU_HOST_RETIP and
- * R7 VCPU_HOST_MSR
+ * R5 VCPU_HOST_RETIP and
+ * R6 VCPU_HOST_MSR
*/
- mtlr r3
+ /* Restore host IP -> SRR0 */
+ ld r5, VCPU_HOST_RETIP(r7)
+
+ /* XXX Better move to a safe function?
+ * What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
- ld r5, VCPU_TRAMPOLINE_LOWMEM(r12)
- mtsrr0 r5
- LOAD_REG_IMMEDIATE(r5, MSR_KERNEL & ~(MSR_IR | MSR_DR))
- mtsrr1 r5
+ mtlr r12
+
+ ld r4, VCPU_TRAMPOLINE_LOWMEM(r7)
+ mtsrr0 r4
+ LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
+ mtsrr1 r3
RFI
/* Jump back to lightweight entry if we're supposed to */
/* go back into the guest */
- mr r5, r3
+
+ /* Pass the exit number as 3rd argument to kvmppc_handle_exit */
+ mr r5, r12
+
/* Restore r3 (kvm_run) and r4 (vcpu) */
REST_2GPRS(3, r1)
bl KVMPPC_HANDLE_EXIT
-#if 0 /* XXX get lightweight exits back */
+ /* If RESUME_GUEST, get back in the loop */
cmpwi r3, RESUME_GUEST
- bne kvm_exit_heavyweight
+ beq kvm_loop_lightweight
- /* put VCPU and KVM_RUN back into place and roll again! */
- REST_2GPRS(3, r1)
- b kvm_start_lightweight
+ cmpwi r3, RESUME_GUEST_NV
+ beq kvm_loop_heavyweight
-kvm_exit_heavyweight:
- /* Restore non-volatile host registers */
- ld r14, _LINK(r1)
- mtlr r14
- REST_NVGPRS(r1)
+kvm_exit_loop:
- addi r1, r1, SWITCH_FRAME_SIZE
-#else
ld r4, _LINK(r1)
mtlr r4
- cmpwi r3, RESUME_GUEST
- bne kvm_exit_heavyweight
+ /* Restore non-volatile host registers (r14 - r31) */
+ REST_NVGPRS(r1)
+
+ addi r1, r1, SWITCH_FRAME_SIZE
+ blr
+
+kvm_loop_heavyweight:
+
+ ld r4, _LINK(r1)
+ std r4, (16 + SWITCH_FRAME_SIZE)(r1)
+ /* Load vcpu and cpu_run */
REST_2GPRS(3, r1)
- addi r1, r1, SWITCH_FRAME_SIZE
+ /* Load non-volatile guest state from the vcpu */
+ VCPU_LOAD_NVGPRS(r4)
- b kvm_start_entry
+ /* Jump back into the beginning of this function */
+ b kvm_start_lightweight
-kvm_exit_heavyweight:
+kvm_loop_lightweight:
- addi r1, r1, SWITCH_FRAME_SIZE
-#endif
+ /* We'll need the vcpu pointer */
+ REST_GPR(4, r1)
+
+ /* Jump back into the beginning of this function */
+ b kvm_start_lightweight
- blr
if (!vcpu_book3s->slb[i].valid)
continue;
- if (vcpu_book3s->slb[i].large)
+ if (vcpu_book3s->slb[i].tb)
cmp_esid = esid_1t;
if (vcpu_book3s->slb[i].esid == cmp_esid)
eaddr, esid, esid_1t);
for (i = 0; i < vcpu_book3s->slb_nr; i++) {
if (vcpu_book3s->slb[i].vsid)
- dprintk(" %d: %c%c %llx %llx\n", i,
+ dprintk(" %d: %c%c%c %llx %llx\n", i,
vcpu_book3s->slb[i].valid ? 'v' : ' ',
vcpu_book3s->slb[i].large ? 'l' : ' ',
+ vcpu_book3s->slb[i].tb ? 't' : ' ',
vcpu_book3s->slb[i].esid,
vcpu_book3s->slb[i].vsid);
}
if (!slb)
return 0;
- if (slb->large)
+ if (slb->tb)
return (((u64)eaddr >> 12) & 0xfffffff) |
(((u64)slb->vsid) << 28);
slbe = &vcpu_book3s->slb[slb_nr];
slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
- slbe->esid = slbe->large ? esid_1t : esid;
+ slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0;
+ slbe->esid = slbe->tb ? esid_1t : esid;
slbe->vsid = rs >> 12;
slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0;
* To distinguish, we check a magic byte in the PACA
*/
mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */
- std r12, (PACA_EXMC + EX_R12)(r13)
+ std r12, PACA_KVM_SCRATCH0(r13)
mfcr r12
- stw r12, (PACA_EXMC + EX_CCR)(r13)
+ stw r12, PACA_KVM_SCRATCH1(r13)
lbz r12, PACA_KVM_IN_GUEST(r13)
- cmpwi r12, 0
+ cmpwi r12, KVM_GUEST_MODE_NONE
bne ..kvmppc_handler_hasmagic_\intno
/* No KVM guest? Then jump back to the Linux handler! */
- lwz r12, (PACA_EXMC + EX_CCR)(r13)
+ lwz r12, PACA_KVM_SCRATCH1(r13)
mtcr r12
- ld r12, (PACA_EXMC + EX_R12)(r13)
+ ld r12, PACA_KVM_SCRATCH0(r13)
mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */
b kvmppc_resume_\intno /* Get back original handler */
/* Now we know we're handling a KVM guest */
..kvmppc_handler_hasmagic_\intno:
- /* Unset guest state */
- li r12, 0
- stb r12, PACA_KVM_IN_GUEST(r13)
- std r1, (PACA_EXMC+EX_R9)(r13)
- std r10, (PACA_EXMC+EX_R10)(r13)
- std r11, (PACA_EXMC+EX_R11)(r13)
- std r2, (PACA_EXMC+EX_R13)(r13)
-
- mfsrr0 r10
- mfsrr1 r11
-
- /* Restore R1/R2 so we can handle faults */
- ld r1, PACAR1(r13)
- ld r2, (PACA_EXMC+EX_SRR0)(r13)
+ /* Should we just skip the faulting instruction? */
+ cmpwi r12, KVM_GUEST_MODE_SKIP
+ beq kvmppc_handler_skip_ins
/* Let's store which interrupt we're handling */
li r12, \intno
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX
+/*
+ * Bring us back to the faulting code, but skip the
+ * faulting instruction.
+ *
+ * This is a generic exit path from the interrupt
+ * trampolines above.
+ *
+ * Input Registers:
+ *
+ * R12 = free
+ * R13 = PACA
+ * PACA.KVM.SCRATCH0 = guest R12
+ * PACA.KVM.SCRATCH1 = guest CR
+ * SPRG_SCRATCH0 = guest R13
+ *
+ */
+kvmppc_handler_skip_ins:
+
+ /* Patch the IP to the next instruction */
+ mfsrr0 r12
+ addi r12, r12, 4
+ mtsrr0 r12
+
+ /* Clean up all state */
+ lwz r12, PACA_KVM_SCRATCH1(r13)
+ mtcr r12
+ ld r12, PACA_KVM_SCRATCH0(r13)
+ mfspr r13, SPRN_SPRG_SCRATCH0
+
+ /* And get back into the code */
+ RFI
+
/*
* This trampoline brings us back to a real mode handler
*
* Input Registers:
*
- * R6 = SRR0
- * R7 = SRR1
+ * R5 = SRR0
+ * R6 = SRR1
* LR = real-mode IP
*
*/
.global kvmppc_handler_lowmem_trampoline
kvmppc_handler_lowmem_trampoline:
- mtsrr0 r6
- mtsrr1 r7
+ mtsrr0 r5
+ mtsrr1 r6
blr
kvmppc_handler_lowmem_trampoline_end:
+/*
+ * Call a function in real mode
+ *
+ * Input Registers:
+ *
+ * R3 = function
+ * R4 = MSR
+ * R5 = CTR
+ *
+ */
+_GLOBAL(kvmppc_rmcall)
+ mtmsr r4 /* Disable relocation, so mtsrr
+ doesn't get interrupted */
+ mtctr r5
+ mtsrr0 r3
+ mtsrr1 r4
+ RFI
+
+/*
+ * Activate current's external feature (FPU/Altivec/VSX)
+ */
+#define define_load_up(what) \
+ \
+_GLOBAL(kvmppc_load_up_ ## what); \
+ subi r1, r1, INT_FRAME_SIZE; \
+ mflr r3; \
+ std r3, _LINK(r1); \
+ mfmsr r4; \
+ std r31, GPR3(r1); \
+ mr r31, r4; \
+ li r5, MSR_DR; \
+ oris r5, r5, MSR_EE@h; \
+ andc r4, r4, r5; \
+ mtmsr r4; \
+ \
+ bl .load_up_ ## what; \
+ \
+ mtmsr r31; \
+ ld r3, _LINK(r1); \
+ ld r31, GPR3(r1); \
+ addi r1, r1, INT_FRAME_SIZE; \
+ mtlr r3; \
+ blr
+
+define_load_up(fpu)
+#ifdef CONFIG_ALTIVEC
+define_load_up(altivec)
+#endif
+#ifdef CONFIG_VSX
+define_load_up(vsx)
+#endif
+
.global kvmppc_trampoline_lowmem
kvmppc_trampoline_lowmem:
.long kvmppc_handler_lowmem_trampoline - _stext
#define REBOLT_SLB_ENTRY(num) \
ld r10, SHADOW_SLB_ESID(num)(r11); \
cmpdi r10, 0; \
- beq slb_exit_skip_1; \
+ beq slb_exit_skip_ ## num; \
oris r10, r10, SLB_ESID_V@h; \
ld r9, SHADOW_SLB_VSID(num)(r11); \
slbmte r9, r10; \
*
* MSR = ~IR|DR
* R13 = PACA
+ * R1 = host R1
+ * R2 = host R2
* R9 = guest IP
* R10 = guest MSR
- * R11 = free
- * R12 = free
- * PACA[PACA_EXMC + EX_R9] = guest R9
- * PACA[PACA_EXMC + EX_R10] = guest R10
- * PACA[PACA_EXMC + EX_R11] = guest R11
- * PACA[PACA_EXMC + EX_R12] = guest R12
- * PACA[PACA_EXMC + EX_R13] = guest R13
- * PACA[PACA_EXMC + EX_CCR] = guest CR
- * PACA[PACA_EXMC + EX_R3] = guest XER
+ * all other GPRS = free
+ * PACA[KVM_CR] = guest CR
+ * PACA[KVM_XER] = guest XER
*/
mtsrr0 r9
mtsrr1 r10
- mtspr SPRN_SPRG_SCRATCH0, r0
+ /* Activate guest mode, so faults get handled by KVM */
+ li r11, KVM_GUEST_MODE_GUEST
+ stb r11, PACA_KVM_IN_GUEST(r13)
/* Remove LPAR shadow entries */
/* Enter guest */
- mfspr r0, SPRN_SPRG_SCRATCH0
-
- ld r9, (PACA_EXMC+EX_R9)(r13)
- ld r10, (PACA_EXMC+EX_R10)(r13)
- ld r12, (PACA_EXMC+EX_R12)(r13)
-
- lwz r11, (PACA_EXMC+EX_CCR)(r13)
+ ld r0, (PACA_KVM_R0)(r13)
+ ld r1, (PACA_KVM_R1)(r13)
+ ld r2, (PACA_KVM_R2)(r13)
+ ld r3, (PACA_KVM_R3)(r13)
+ ld r4, (PACA_KVM_R4)(r13)
+ ld r5, (PACA_KVM_R5)(r13)
+ ld r6, (PACA_KVM_R6)(r13)
+ ld r7, (PACA_KVM_R7)(r13)
+ ld r8, (PACA_KVM_R8)(r13)
+ ld r9, (PACA_KVM_R9)(r13)
+ ld r10, (PACA_KVM_R10)(r13)
+ ld r12, (PACA_KVM_R12)(r13)
+
+ lwz r11, (PACA_KVM_CR)(r13)
mtcr r11
- ld r11, (PACA_EXMC+EX_R3)(r13)
+ ld r11, (PACA_KVM_XER)(r13)
mtxer r11
- ld r11, (PACA_EXMC+EX_R11)(r13)
- ld r13, (PACA_EXMC+EX_R13)(r13)
+ ld r11, (PACA_KVM_R11)(r13)
+ ld r13, (PACA_KVM_R13)(r13)
RFI
kvmppc_handler_trampoline_enter_end:
/* Register usage at this point:
*
- * SPRG_SCRATCH0 = guest R13
- * R01 = host R1
- * R02 = host R2
- * R10 = guest PC
- * R11 = guest MSR
- * R12 = exit handler id
- * R13 = PACA
- * PACA.exmc.CCR = guest CR
- * PACA.exmc.R9 = guest R1
- * PACA.exmc.R10 = guest R10
- * PACA.exmc.R11 = guest R11
- * PACA.exmc.R12 = guest R12
- * PACA.exmc.R13 = guest R2
+ * SPRG_SCRATCH0 = guest R13
+ * R12 = exit handler id
+ * R13 = PACA
+ * PACA.KVM.SCRATCH0 = guest R12
+ * PACA.KVM.SCRATCH1 = guest CR
*
*/
/* Save registers */
- std r0, (PACA_EXMC+EX_SRR0)(r13)
- std r9, (PACA_EXMC+EX_R3)(r13)
- std r10, (PACA_EXMC+EX_LR)(r13)
- std r11, (PACA_EXMC+EX_DAR)(r13)
+ std r0, PACA_KVM_R0(r13)
+ std r1, PACA_KVM_R1(r13)
+ std r2, PACA_KVM_R2(r13)
+ std r3, PACA_KVM_R3(r13)
+ std r4, PACA_KVM_R4(r13)
+ std r5, PACA_KVM_R5(r13)
+ std r6, PACA_KVM_R6(r13)
+ std r7, PACA_KVM_R7(r13)
+ std r8, PACA_KVM_R8(r13)
+ std r9, PACA_KVM_R9(r13)
+ std r10, PACA_KVM_R10(r13)
+ std r11, PACA_KVM_R11(r13)
+
+ /* Restore R1/R2 so we can handle faults */
+ ld r1, PACA_KVM_HOST_R1(r13)
+ ld r2, PACA_KVM_HOST_R2(r13)
+
+ /* Save guest PC and MSR in GPRs */
+ mfsrr0 r3
+ mfsrr1 r4
+
+ /* Get scratch'ed off registers */
+ mfspr r9, SPRN_SPRG_SCRATCH0
+ std r9, PACA_KVM_R13(r13)
+
+ ld r8, PACA_KVM_SCRATCH0(r13)
+ std r8, PACA_KVM_R12(r13)
+
+ lwz r7, PACA_KVM_SCRATCH1(r13)
+ stw r7, PACA_KVM_CR(r13)
+
+ /* Save more register state */
+
+ mfxer r6
+ stw r6, PACA_KVM_XER(r13)
+
+ mfdar r5
+ mfdsisr r6
/*
* In order for us to easily get the last instruction,
ld_last_inst:
/* Save off the guest instruction we're at */
+
+ /* Set guest mode to 'jump over instruction' so if lwz faults
+ * we'll just continue at the next IP. */
+ li r9, KVM_GUEST_MODE_SKIP
+ stb r9, PACA_KVM_IN_GUEST(r13)
+
/* 1) enable paging for data */
mfmsr r9
ori r11, r9, MSR_DR /* Enable paging for data */
mtmsr r11
/* 2) fetch the instruction */
- lwz r0, 0(r10)
+ li r0, KVM_INST_FETCH_FAILED /* In case lwz faults */
+ lwz r0, 0(r3)
/* 3) disable paging again */
mtmsr r9
no_ld_last_inst:
+ /* Unset guest mode */
+ li r9, KVM_GUEST_MODE_NONE
+ stb r9, PACA_KVM_IN_GUEST(r13)
+
/* Restore bolted entries from the shadow and fix it along the way */
/* We don't store anything in entry 0, so we don't need to take care of it */
slb_do_exit:
- /* Restore registers */
-
- ld r11, (PACA_EXMC+EX_DAR)(r13)
- ld r10, (PACA_EXMC+EX_LR)(r13)
- ld r9, (PACA_EXMC+EX_R3)(r13)
-
- /* Save last inst */
- stw r0, (PACA_EXMC+EX_LR)(r13)
-
- /* Save DAR and DSISR before going to paged mode */
- mfdar r0
- std r0, (PACA_EXMC+EX_DAR)(r13)
- mfdsisr r0
- stw r0, (PACA_EXMC+EX_DSISR)(r13)
+ /* Register usage at this point:
+ *
+ * R0 = guest last inst
+ * R1 = host R1
+ * R2 = host R2
+ * R3 = guest PC
+ * R4 = guest MSR
+ * R5 = guest DAR
+ * R6 = guest DSISR
+ * R12 = exit handler id
+ * R13 = PACA
+ * PACA.KVM.* = guest *
+ *
+ */
/* RFI into the highmem handler */
- mfmsr r0
- ori r0, r0, MSR_IR|MSR_DR|MSR_RI /* Enable paging */
- mtsrr1 r0
- ld r0, PACASAVEDMSR(r13) /* Highmem handler address */
- mtsrr0 r0
-
- mfspr r0, SPRN_SPRG_SCRATCH0
+ mfmsr r7
+ ori r7, r7, MSR_IR|MSR_DR|MSR_RI /* Enable paging */
+ mtsrr1 r7
+ ld r8, PACA_KVM_VMHANDLER(r13) /* Highmem handler address */
+ mtsrr0 r8
RFI
kvmppc_handler_trampoline_exit_end:
for (i = 0; i < 32; i += 4) {
printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
- vcpu->arch.gpr[i],
- vcpu->arch.gpr[i+1],
- vcpu->arch.gpr[i+2],
- vcpu->arch.gpr[i+3]);
+ kvmppc_get_gpr(vcpu, i),
+ kvmppc_get_gpr(vcpu, i+1),
+ kvmppc_get_gpr(vcpu, i+2),
+ kvmppc_get_gpr(vcpu, i+3));
}
}
set_bit(priority, &vcpu->arch.pending_exceptions);
}
-void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
+ ulong dear_flags, ulong esr_flags)
{
+ vcpu->arch.queued_dear = dear_flags;
+ vcpu->arch.queued_esr = esr_flags;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
+}
+
+static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
+ ulong dear_flags, ulong esr_flags)
+{
+ vcpu->arch.queued_dear = dear_flags;
+ vcpu->arch.queued_esr = esr_flags;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
+}
+
+static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
+ ulong esr_flags)
+{
+ vcpu->arch.queued_esr = esr_flags;
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
+}
+
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
+{
+ vcpu->arch.queued_esr = esr_flags;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
}
return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
}
+void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
+{
+ clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
+}
+
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
{
int allowed = 0;
ulong msr_mask;
+ bool update_esr = false, update_dear = false;
switch (priority) {
- case BOOKE_IRQPRIO_PROGRAM:
case BOOKE_IRQPRIO_DTLB_MISS:
- case BOOKE_IRQPRIO_ITLB_MISS:
- case BOOKE_IRQPRIO_SYSCALL:
case BOOKE_IRQPRIO_DATA_STORAGE:
+ update_dear = true;
+ /* fall through */
case BOOKE_IRQPRIO_INST_STORAGE:
+ case BOOKE_IRQPRIO_PROGRAM:
+ update_esr = true;
+ /* fall through */
+ case BOOKE_IRQPRIO_ITLB_MISS:
+ case BOOKE_IRQPRIO_SYSCALL:
case BOOKE_IRQPRIO_FP_UNAVAIL:
case BOOKE_IRQPRIO_SPE_UNAVAIL:
case BOOKE_IRQPRIO_SPE_FP_DATA:
vcpu->arch.srr0 = vcpu->arch.pc;
vcpu->arch.srr1 = vcpu->arch.msr;
vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
+ if (update_esr == true)
+ vcpu->arch.esr = vcpu->arch.queued_esr;
+ if (update_dear == true)
+ vcpu->arch.dear = vcpu->arch.queued_dear;
kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
clear_bit(priority, &vcpu->arch.pending_exceptions);
if (vcpu->arch.msr & MSR_PR) {
/* Program traps generated by user-level software must be handled
* by the guest kernel. */
- vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
+ kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
r = RESUME_GUEST;
kvmppc_account_exit(vcpu, USR_PR_INST);
break;
break;
case BOOKE_INTERRUPT_DATA_STORAGE:
- vcpu->arch.dear = vcpu->arch.fault_dear;
- vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
+ kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
+ vcpu->arch.fault_esr);
kvmppc_account_exit(vcpu, DSI_EXITS);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_INST_STORAGE:
- vcpu->arch.esr = vcpu->arch.fault_esr;
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
+ kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr);
kvmppc_account_exit(vcpu, ISI_EXITS);
r = RESUME_GUEST;
break;
gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */
- kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
- vcpu->arch.dear = vcpu->arch.fault_dear;
- vcpu->arch.esr = vcpu->arch.fault_esr;
+ kvmppc_core_queue_dtlb_miss(vcpu,
+ vcpu->arch.fault_dear,
+ vcpu->arch.fault_esr);
kvmppc_mmu_dtlb_miss(vcpu);
kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
r = RESUME_GUEST;
{
vcpu->arch.pc = 0;
vcpu->arch.msr = 0;
- vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
+ kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
vcpu->arch.shadow_pid = 1;
int i;
regs->pc = vcpu->arch.pc;
- regs->cr = vcpu->arch.cr;
+ regs->cr = kvmppc_get_cr(vcpu);
regs->ctr = vcpu->arch.ctr;
regs->lr = vcpu->arch.lr;
- regs->xer = vcpu->arch.xer;
+ regs->xer = kvmppc_get_xer(vcpu);
regs->msr = vcpu->arch.msr;
regs->srr0 = vcpu->arch.srr0;
regs->srr1 = vcpu->arch.srr1;
regs->sprg7 = vcpu->arch.sprg6;
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
- regs->gpr[i] = vcpu->arch.gpr[i];
+ regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
return 0;
}
int i;
vcpu->arch.pc = regs->pc;
- vcpu->arch.cr = regs->cr;
+ kvmppc_set_cr(vcpu, regs->cr);
vcpu->arch.ctr = regs->ctr;
vcpu->arch.lr = regs->lr;
- vcpu->arch.xer = regs->xer;
+ kvmppc_set_xer(vcpu, regs->xer);
kvmppc_set_msr(vcpu, regs->msr);
vcpu->arch.srr0 = regs->srr0;
vcpu->arch.srr1 = regs->srr1;
vcpu->arch.sprg6 = regs->sprg5;
vcpu->arch.sprg7 = regs->sprg6;
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
- vcpu->arch.gpr[i] = regs->gpr[i];
+ for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+ kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
return 0;
}
case OP_31_XOP_MFMSR:
rt = get_rt(inst);
- vcpu->arch.gpr[rt] = vcpu->arch.msr;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr);
kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
break;
case OP_31_XOP_MTMSR:
rs = get_rs(inst);
kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
- kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+ kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_WRTEE:
rs = get_rs(inst);
vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
- | (vcpu->arch.gpr[rs] & MSR_EE);
+ | (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
break;
int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
{
int emulated = EMULATE_DONE;
+ ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_DEAR:
- vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.dear = spr_val; break;
case SPRN_ESR:
- vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.esr = spr_val; break;
case SPRN_DBCR0:
- vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.dbcr0 = spr_val; break;
case SPRN_DBCR1:
- vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.dbcr1 = spr_val; break;
case SPRN_DBSR:
- vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break;
+ vcpu->arch.dbsr &= ~spr_val; break;
case SPRN_TSR:
- vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+ vcpu->arch.tsr &= ~spr_val; break;
case SPRN_TCR:
- vcpu->arch.tcr = vcpu->arch.gpr[rs];
+ vcpu->arch.tcr = spr_val;
kvmppc_emulate_dec(vcpu);
break;
* loaded into the real SPRGs when resuming the
* guest. */
case SPRN_SPRG4:
- vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.sprg4 = spr_val; break;
case SPRN_SPRG5:
- vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.sprg5 = spr_val; break;
case SPRN_SPRG6:
- vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.sprg6 = spr_val; break;
case SPRN_SPRG7:
- vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.sprg7 = spr_val; break;
case SPRN_IVPR:
- vcpu->arch.ivpr = vcpu->arch.gpr[rs];
+ vcpu->arch.ivpr = spr_val;
break;
case SPRN_IVOR0:
- vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
break;
case SPRN_IVOR1:
- vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = spr_val;
break;
case SPRN_IVOR2:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
break;
case SPRN_IVOR3:
- vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
break;
case SPRN_IVOR4:
- vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = spr_val;
break;
case SPRN_IVOR5:
- vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = spr_val;
break;
case SPRN_IVOR6:
- vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = spr_val;
break;
case SPRN_IVOR7:
- vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = spr_val;
break;
case SPRN_IVOR8:
- vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
break;
case SPRN_IVOR9:
- vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
break;
case SPRN_IVOR10:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = spr_val;
break;
case SPRN_IVOR11:
- vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = spr_val;
break;
case SPRN_IVOR12:
- vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = spr_val;
break;
case SPRN_IVOR13:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = spr_val;
break;
case SPRN_IVOR14:
- vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = spr_val;
break;
case SPRN_IVOR15:
- vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
break;
default:
switch (sprn) {
case SPRN_IVPR:
- vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
case SPRN_DEAR:
- vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break;
case SPRN_ESR:
- vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
case SPRN_DBCR0:
- vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
case SPRN_DBCR1:
- vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
case SPRN_DBSR:
- vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
case SPRN_IVOR0:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
break;
case SPRN_IVOR1:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
break;
case SPRN_IVOR2:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
break;
case SPRN_IVOR3:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
break;
case SPRN_IVOR4:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
break;
case SPRN_IVOR5:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
break;
case SPRN_IVOR6:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
break;
case SPRN_IVOR7:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
break;
case SPRN_IVOR8:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
break;
case SPRN_IVOR9:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
break;
case SPRN_IVOR10:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
break;
case SPRN_IVOR11:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]);
break;
case SPRN_IVOR12:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
break;
case SPRN_IVOR13:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]);
break;
case SPRN_IVOR14:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]);
break;
case SPRN_IVOR15:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]);
break;
default:
kvmppc_e500_tlb_setup(vcpu_e500);
+ /* Registers init */
+ vcpu->arch.pvr = mfspr(SPRN_PVR);
+
+ /* Since booke kvm only support one core, update all vcpus' PIR to 0 */
+ vcpu->vcpu_id = 0;
+
return 0;
}
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int emulated = EMULATE_DONE;
+ ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_PID:
vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
- vcpu->arch.pid = vcpu->arch.gpr[rs];
+ vcpu->arch.pid = spr_val;
break;
case SPRN_PID1:
- vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break;
+ vcpu_e500->pid[1] = spr_val; break;
case SPRN_PID2:
- vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break;
+ vcpu_e500->pid[2] = spr_val; break;
case SPRN_MAS0:
- vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break;
+ vcpu_e500->mas0 = spr_val; break;
case SPRN_MAS1:
- vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break;
+ vcpu_e500->mas1 = spr_val; break;
case SPRN_MAS2:
- vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break;
+ vcpu_e500->mas2 = spr_val; break;
case SPRN_MAS3:
- vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break;
+ vcpu_e500->mas3 = spr_val; break;
case SPRN_MAS4:
- vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break;
+ vcpu_e500->mas4 = spr_val; break;
case SPRN_MAS6:
- vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break;
+ vcpu_e500->mas6 = spr_val; break;
case SPRN_MAS7:
- vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break;
+ vcpu_e500->mas7 = spr_val; break;
+ case SPRN_L1CSR0:
+ vcpu_e500->l1csr0 = spr_val;
+ vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
+ break;
case SPRN_L1CSR1:
- vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break;
+ vcpu_e500->l1csr1 = spr_val; break;
case SPRN_HID0:
- vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break;
+ vcpu_e500->hid0 = spr_val; break;
case SPRN_HID1:
- vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break;
+ vcpu_e500->hid1 = spr_val; break;
case SPRN_MMUCSR0:
emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
- vcpu->arch.gpr[rs]);
+ spr_val);
break;
/* extra exceptions */
case SPRN_IVOR32:
- vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
break;
case SPRN_IVOR33:
- vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = spr_val;
break;
case SPRN_IVOR34:
- vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
break;
case SPRN_IVOR35:
- vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs];
+ vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
break;
default:
switch (sprn) {
case SPRN_PID:
- vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break;
case SPRN_PID1:
- vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break;
case SPRN_PID2:
- vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
case SPRN_MAS0:
- vcpu->arch.gpr[rt] = vcpu_e500->mas0; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break;
case SPRN_MAS1:
- vcpu->arch.gpr[rt] = vcpu_e500->mas1; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break;
case SPRN_MAS2:
- vcpu->arch.gpr[rt] = vcpu_e500->mas2; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
case SPRN_MAS3:
- vcpu->arch.gpr[rt] = vcpu_e500->mas3; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
case SPRN_MAS4:
- vcpu->arch.gpr[rt] = vcpu_e500->mas4; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
case SPRN_MAS6:
- vcpu->arch.gpr[rt] = vcpu_e500->mas6; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
case SPRN_MAS7:
- vcpu->arch.gpr[rt] = vcpu_e500->mas7; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
case SPRN_TLB0CFG:
- vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG);
- vcpu->arch.gpr[rt] &= ~0xfffUL;
- vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0];
- break;
-
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
case SPRN_TLB1CFG:
- vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG);
- vcpu->arch.gpr[rt] &= ~0xfffUL;
- vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1];
- break;
-
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break;
+ case SPRN_L1CSR0:
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break;
case SPRN_L1CSR1:
- vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break;
case SPRN_HID0:
- vcpu->arch.gpr[rt] = vcpu_e500->hid0; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
case SPRN_HID1:
- vcpu->arch.gpr[rt] = vcpu_e500->hid1; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
case SPRN_MMUCSR0:
- vcpu->arch.gpr[rt] = 0; break;
+ kvmppc_set_gpr(vcpu, rt, 0); break;
case SPRN_MMUCFG:
- vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break;
+ kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break;
/* extra exceptions */
case SPRN_IVOR32:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]);
break;
case SPRN_IVOR33:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]);
break;
case SPRN_IVOR34:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]);
break;
case SPRN_IVOR35:
- vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]);
break;
default:
emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
int esel, tlbsel;
gva_t ea;
- ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb];
+ ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
ia = (ea >> 2) & 0x1;
struct tlbe *gtlbe = NULL;
gva_t ea;
- ea = vcpu->arch.gpr[rb];
+ ea = kvmppc_get_gpr(vcpu, rb);
for (tlbsel = 0; tlbsel < 2; tlbsel++) {
esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
if (vcpu_e500->shadow_pages[1] == NULL)
goto err_out_page0;
+ /* Init TLB configuration register */
+ vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
+ vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0];
+ vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
+ vcpu_e500->tlb1cfg |= vcpu_e500->guest_tlb_size[1];
+
return 0;
err_out_page0:
pr_debug("mtDEC: %x\n", vcpu->arch.dec);
#ifdef CONFIG_PPC64
+ /* mtdec lowers the interrupt line when positive. */
+ kvmppc_core_dequeue_dec(vcpu);
+
/* POWER4+ triggers a dec interrupt if the value is < 0 */
if (vcpu->arch.dec & 0x80000000) {
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
+ /* Try again next time */
+ if (inst == KVM_INST_FETCH_FAILED)
+ return EMULATE_DONE;
+
switch (get_op(inst)) {
case OP_TRAP:
#ifdef CONFIG_PPC64
case OP_TRAP_64:
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
#else
- vcpu->arch.esr |= ESR_PTR;
+ kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR);
#endif
- kvmppc_core_queue_program(vcpu);
advance = 0;
break;
case OP_31_XOP_STWX:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
- vcpu->arch.gpr[rs],
+ kvmppc_get_gpr(vcpu, rs),
4, 1);
break;
case OP_31_XOP_STBX:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
- vcpu->arch.gpr[rs],
+ kvmppc_get_gpr(vcpu, rs),
1, 1);
break;
ra = get_ra(inst);
rb = get_rb(inst);
- ea = vcpu->arch.gpr[rb];
+ ea = kvmppc_get_gpr(vcpu, rb);
if (ra)
- ea += vcpu->arch.gpr[ra];
+ ea += kvmppc_get_gpr(vcpu, ra);
emulated = kvmppc_handle_store(run, vcpu,
- vcpu->arch.gpr[rs],
+ kvmppc_get_gpr(vcpu, rs),
1, 1);
- vcpu->arch.gpr[rs] = ea;
+ kvmppc_set_gpr(vcpu, rs, ea);
break;
case OP_31_XOP_LHZX:
ra = get_ra(inst);
rb = get_rb(inst);
- ea = vcpu->arch.gpr[rb];
+ ea = kvmppc_get_gpr(vcpu, rb);
if (ra)
- ea += vcpu->arch.gpr[ra];
+ ea += kvmppc_get_gpr(vcpu, ra);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
- vcpu->arch.gpr[ra] = ea;
+ kvmppc_set_gpr(vcpu, ra, ea);
break;
case OP_31_XOP_MFSPR:
switch (sprn) {
case SPRN_SRR0:
- vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break;
case SPRN_SRR1:
- vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break;
case SPRN_PVR:
- vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
case SPRN_PIR:
- vcpu->arch.gpr[rt] = vcpu->vcpu_id; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break;
case SPRN_MSSSR0:
- vcpu->arch.gpr[rt] = 0; break;
+ kvmppc_set_gpr(vcpu, rt, 0); break;
/* Note: mftb and TBRL/TBWL are user-accessible, so
* the guest can always access the real TB anyways.
* In fact, we probably will never see these traps. */
case SPRN_TBWL:
- vcpu->arch.gpr[rt] = get_tb() >> 32; break;
+ kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break;
case SPRN_TBWU:
- vcpu->arch.gpr[rt] = get_tb(); break;
+ kvmppc_set_gpr(vcpu, rt, get_tb()); break;
case SPRN_SPRG0:
- vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break;
case SPRN_SPRG1:
- vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break;
case SPRN_SPRG2:
- vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break;
case SPRN_SPRG3:
- vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break;
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break;
/* Note: SPRG4-7 are user-readable, so we don't get
* a trap. */
case SPRN_DEC:
{
u64 jd = get_tb() - vcpu->arch.dec_jiffies;
- vcpu->arch.gpr[rt] = vcpu->arch.dec - jd;
- pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n", vcpu->arch.dec, jd, vcpu->arch.gpr[rt]);
+ kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
+ pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n",
+ vcpu->arch.dec, jd,
+ kvmppc_get_gpr(vcpu, rt));
break;
}
default:
emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
if (emulated == EMULATE_FAIL) {
printk("mfspr: unknown spr %x\n", sprn);
- vcpu->arch.gpr[rt] = 0;
+ kvmppc_set_gpr(vcpu, rt, 0);
}
break;
}
rb = get_rb(inst);
emulated = kvmppc_handle_store(run, vcpu,
- vcpu->arch.gpr[rs],
+ kvmppc_get_gpr(vcpu, rs),
2, 1);
break;
ra = get_ra(inst);
rb = get_rb(inst);
- ea = vcpu->arch.gpr[rb];
+ ea = kvmppc_get_gpr(vcpu, rb);
if (ra)
- ea += vcpu->arch.gpr[ra];
+ ea += kvmppc_get_gpr(vcpu, ra);
emulated = kvmppc_handle_store(run, vcpu,
- vcpu->arch.gpr[rs],
+ kvmppc_get_gpr(vcpu, rs),
2, 1);
- vcpu->arch.gpr[ra] = ea;
+ kvmppc_set_gpr(vcpu, ra, ea);
break;
case OP_31_XOP_MTSPR:
rs = get_rs(inst);
switch (sprn) {
case SPRN_SRR0:
- vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break;
case SPRN_SRR1:
- vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break;
/* XXX We need to context-switch the timebase for
* watchdog and FIT. */
case SPRN_MSSSR0: break;
case SPRN_DEC:
- vcpu->arch.dec = vcpu->arch.gpr[rs];
+ vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs);
kvmppc_emulate_dec(vcpu);
break;
case SPRN_SPRG0:
- vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break;
case SPRN_SPRG1:
- vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break;
case SPRN_SPRG2:
- vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break;
case SPRN_SPRG3:
- vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
+ vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break;
default:
emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
rb = get_rb(inst);
emulated = kvmppc_handle_store(run, vcpu,
- vcpu->arch.gpr[rs],
+ kvmppc_get_gpr(vcpu, rs),
4, 0);
break;
rb = get_rb(inst);
emulated = kvmppc_handle_store(run, vcpu,
- vcpu->arch.gpr[rs],
+ kvmppc_get_gpr(vcpu, rs),
2, 0);
break;
ra = get_ra(inst);
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
- vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
break;
case OP_LBZ:
ra = get_ra(inst);
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
- vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
break;
case OP_STW:
rs = get_rs(inst);
- emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
4, 1);
break;
case OP_STWU:
ra = get_ra(inst);
rs = get_rs(inst);
- emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
4, 1);
- vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
break;
case OP_STB:
rs = get_rs(inst);
- emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
1, 1);
break;
case OP_STBU:
ra = get_ra(inst);
rs = get_rs(inst);
- emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
1, 1);
- vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
break;
case OP_LHZ:
ra = get_ra(inst);
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
- vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
break;
case OP_STH:
rs = get_rs(inst);
- emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
2, 1);
break;
case OP_STHU:
ra = get_ra(inst);
rs = get_rs(inst);
- emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs),
2, 1);
- vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
break;
default:
advance = 0;
printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
"(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
+ kvmppc_core_queue_program(vcpu, 0);
}
}
{
kvmppc_free_vcpus(kvm);
kvm_free_physmem(kvm);
+ cleanup_srcu_struct(&kvm->srcu);
kfree(kvm);
}
return -EINVAL;
}
-int kvm_arch_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot old,
- int user_alloc)
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_memory_slot old,
+ struct kvm_userspace_memory_region *mem,
+ int user_alloc)
{
return 0;
}
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old,
+ int user_alloc)
+{
+ return;
+}
+
+
void kvm_arch_flush_shadow(struct kvm *kvm)
{
}
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
- ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
- *gpr = run->dcr.data;
+ kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
}
static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
- ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+ ulong gpr;
- if (run->mmio.len > sizeof(*gpr)) {
+ if (run->mmio.len > sizeof(gpr)) {
printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
return;
}
if (vcpu->arch.mmio_is_bigendian) {
switch (run->mmio.len) {
- case 4: *gpr = *(u32 *)run->mmio.data; break;
- case 2: *gpr = *(u16 *)run->mmio.data; break;
- case 1: *gpr = *(u8 *)run->mmio.data; break;
+ case 4: gpr = *(u32 *)run->mmio.data; break;
+ case 2: gpr = *(u16 *)run->mmio.data; break;
+ case 1: gpr = *(u8 *)run->mmio.data; break;
}
} else {
/* Convert BE data from userland back to LE. */
switch (run->mmio.len) {
- case 4: *gpr = ld_le32((u32 *)run->mmio.data); break;
- case 2: *gpr = ld_le16((u16 *)run->mmio.data); break;
- case 1: *gpr = *(u8 *)run->mmio.data; break;
+ case 4: gpr = ld_le32((u32 *)run->mmio.data); break;
+ case 2: gpr = ld_le16((u16 *)run->mmio.data); break;
+ case 1: gpr = *(u8 *)run->mmio.data; break;
}
}
+
+ kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
}
int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvm_free_physmem(kvm);
free_page((unsigned long)(kvm->arch.sca));
debug_unregister(kvm->arch.dbf);
+ cleanup_srcu_struct(&kvm->srcu);
kfree(kvm);
}
}
/* Section: memory related */
-int kvm_arch_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- struct kvm_memory_slot old,
- int user_alloc)
+int kvm_arch_prepare_memory_region(struct kvm *kvm,
+ struct kvm_memory_slot *memslot,
+ struct kvm_memory_slot old,
+ struct kvm_userspace_memory_region *mem,
+ int user_alloc)
{
- int i;
- struct kvm_vcpu *vcpu;
-
/* A few sanity checks. We can have exactly one memory slot which has
to start at guest virtual zero and which has to be located at a
page boundary in userland and which has to end at a page boundary.
if (!user_alloc)
return -EINVAL;
+ return 0;
+}
+
+void kvm_arch_commit_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ struct kvm_memory_slot old,
+ int user_alloc)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+
/* request update of sie control block for all available vcpus */
kvm_for_each_vcpu(i, vcpu, kvm) {
if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
continue;
kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
}
-
- return 0;
}
void kvm_arch_flush_shadow(struct kvm *kvm)
static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
{
+ int idx;
struct kvm_memory_slot *mem;
+ struct kvm_memslots *memslots;
- down_read(&vcpu->kvm->slots_lock);
- mem = &vcpu->kvm->memslots[0];
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ memslots = rcu_dereference(vcpu->kvm->memslots);
+
+ mem = &memslots->memslots[0];
vcpu->arch.sie_block->gmsor = mem->userspace_addr;
vcpu->arch.sie_block->gmslm =
(mem->npages << PAGE_SHIFT) +
VIRTIODESCSPACE - 1ul;
- up_read(&vcpu->kvm->slots_lock);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
}
/* implemented in priv.c */
header-y += ucontext.h
header-y += processor-flags.h
header-y += hw_breakpoint.h
+header-y += hyperv.h
unifdef-y += e820.h
unifdef-y += ist.h
--- /dev/null
+#ifndef _ASM_X86_KVM_HYPERV_H
+#define _ASM_X86_KVM_HYPERV_H
+
+#include <linux/types.h>
+
+/*
+ * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
+ * is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
+ */
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
+#define HYPERV_CPUID_INTERFACE 0x40000001
+#define HYPERV_CPUID_VERSION 0x40000002
+#define HYPERV_CPUID_FEATURES 0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
+
+/*
+ * Feature identification. EAX indicates which features are available
+ * to the partition based upon the current partition privileges.
+ */
+
+/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
+#define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0)
+/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
+#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1)
+/*
+ * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
+ * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
+ */
+#define HV_X64_MSR_SYNIC_AVAILABLE (1 << 2)
+/*
+ * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
+ * HV_X64_MSR_STIMER3_COUNT) available
+ */
+#define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3)
+/*
+ * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
+ * are available
+ */
+#define HV_X64_MSR_APIC_ACCESS_AVAILABLE (1 << 4)
+/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
+#define HV_X64_MSR_HYPERCALL_AVAILABLE (1 << 5)
+/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
+#define HV_X64_MSR_VP_INDEX_AVAILABLE (1 << 6)
+/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
+#define HV_X64_MSR_RESET_AVAILABLE (1 << 7)
+ /*
+ * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
+ * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
+ * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
+ */
+#define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8)
+
+/*
+ * Feature identification: EBX indicates which flags were specified at
+ * partition creation. The format is the same as the partition creation
+ * flag structure defined in section Partition Creation Flags.
+ */
+#define HV_X64_CREATE_PARTITIONS (1 << 0)
+#define HV_X64_ACCESS_PARTITION_ID (1 << 1)
+#define HV_X64_ACCESS_MEMORY_POOL (1 << 2)
+#define HV_X64_ADJUST_MESSAGE_BUFFERS (1 << 3)
+#define HV_X64_POST_MESSAGES (1 << 4)
+#define HV_X64_SIGNAL_EVENTS (1 << 5)
+#define HV_X64_CREATE_PORT (1 << 6)
+#define HV_X64_CONNECT_PORT (1 << 7)
+#define HV_X64_ACCESS_STATS (1 << 8)
+#define HV_X64_DEBUGGING (1 << 11)
+#define HV_X64_CPU_POWER_MANAGEMENT (1 << 12)
+#define HV_X64_CONFIGURE_PROFILER (1 << 13)
+
+/*
+ * Feature identification. EDX indicates which miscellaneous features
+ * are available to the partition.
+ */
+/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
+#define HV_X64_MWAIT_AVAILABLE (1 << 0)
+/* Guest debugging support is available */
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE (1 << 1)
+/* Performance Monitor support is available*/
+#define HV_X64_PERF_MONITOR_AVAILABLE (1 << 2)
+/* Support for physical CPU dynamic partitioning events is available*/
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1 << 3)
+/*
+ * Support for passing hypercall input parameter block via XMM
+ * registers is available
+ */
+#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4)
+/* Support for a virtual guest idle state is available */
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5)
+
+/*
+ * Implementation recommendations. Indicates which behaviors the hypervisor
+ * recommends the OS implement for optimal performance.
+ */
+ /*
+ * Recommend using hypercall for address space switches rather
+ * than MOV to CR3 instruction
+ */
+#define HV_X64_MWAIT_RECOMMENDED (1 << 0)
+/* Recommend using hypercall for local TLB flushes rather
+ * than INVLPG or MOV to CR3 instructions */
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1)
+/*
+ * Recommend using hypercall for remote TLB flushes rather
+ * than inter-processor interrupts
+ */
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED (1 << 2)
+/*
+ * Recommend using MSRs for accessing APIC registers
+ * EOI, ICR and TPR rather than their memory-mapped counterparts
+ */
+#define HV_X64_APIC_ACCESS_RECOMMENDED (1 << 3)
+/* Recommend using the hypervisor-provided MSR to initiate a system RESET */
+#define HV_X64_SYSTEM_RESET_RECOMMENDED (1 << 4)
+/*
+ * Recommend using relaxed timing for this partition. If used,
+ * the VM should disable any watchdog timeouts that rely on the
+ * timely delivery of external interrupts
+ */
+#define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5)
+
+/* MSR used to identify the guest OS. */
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+
+/* MSR used to setup pages used to communicate with the hypervisor. */
+#define HV_X64_MSR_HYPERCALL 0x40000001
+
+/* MSR used to provide vcpu index */
+#define HV_X64_MSR_VP_INDEX 0x40000002
+
+/* Define the virtual APIC registers */
+#define HV_X64_MSR_EOI 0x40000070
+#define HV_X64_MSR_ICR 0x40000071
+#define HV_X64_MSR_TPR 0x40000072
+#define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073
+
+/* Define synthetic interrupt controller model specific registers. */
+#define HV_X64_MSR_SCONTROL 0x40000080
+#define HV_X64_MSR_SVERSION 0x40000081
+#define HV_X64_MSR_SIEFP 0x40000082
+#define HV_X64_MSR_SIMP 0x40000083
+#define HV_X64_MSR_EOM 0x40000084
+#define HV_X64_MSR_SINT0 0x40000090
+#define HV_X64_MSR_SINT1 0x40000091
+#define HV_X64_MSR_SINT2 0x40000092
+#define HV_X64_MSR_SINT3 0x40000093
+#define HV_X64_MSR_SINT4 0x40000094
+#define HV_X64_MSR_SINT5 0x40000095
+#define HV_X64_MSR_SINT6 0x40000096
+#define HV_X64_MSR_SINT7 0x40000097
+#define HV_X64_MSR_SINT8 0x40000098
+#define HV_X64_MSR_SINT9 0x40000099
+#define HV_X64_MSR_SINT10 0x4000009A
+#define HV_X64_MSR_SINT11 0x4000009B
+#define HV_X64_MSR_SINT12 0x4000009C
+#define HV_X64_MSR_SINT13 0x4000009D
+#define HV_X64_MSR_SINT14 0x4000009E
+#define HV_X64_MSR_SINT15 0x4000009F
+
+
+#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
+#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
+#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
+ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
+
+/* Declare the various hypercall operations. */
+#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008
+
+#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001
+#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12
+#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \
+ (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+#define HV_PROCESSOR_POWER_STATE_C0 0
+#define HV_PROCESSOR_POWER_STATE_C1 1
+#define HV_PROCESSOR_POWER_STATE_C2 2
+#define HV_PROCESSOR_POWER_STATE_C3 3
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS 0
+#define HV_STATUS_INVALID_HYPERCALL_CODE 2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
+#define HV_STATUS_INVALID_ALIGNMENT 4
+
+#endif
struct x86_emulate_ops {
/*
* read_std: Read bytes of standard (non-emulated/special) memory.
- * Used for instruction fetch, stack operations, and others.
+ * Used for descriptor reading.
* @addr: [IN ] Linear address from which to read.
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*read_std)(unsigned long addr, void *val,
- unsigned int bytes, struct kvm_vcpu *vcpu);
+ unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
+
+ /*
+ * fetch: Read bytes of standard (non-emulated/special) memory.
+ * Used for instruction fetch.
+ * @addr: [IN ] Linear address from which to read.
+ * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
+ * @bytes: [IN ] Number of bytes to read from memory.
+ */
+ int (*fetch)(unsigned long addr, void *val,
+ unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
/*
* read_emulated: Read bytes from emulated/special memory area.
struct kvm_vcpu *vcpu);
/*
- * write_emulated: Read bytes from emulated/special memory area.
+ * write_emulated: Write bytes to emulated/special memory area.
* @addr: [IN ] Linear address to which to write.
* @val: [IN ] Value to write to memory (low-order bytes used as
* required).
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
+#define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */
#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
#include <asm/mtrr.h>
#include <asm/msr-index.h>
-#define KVM_MAX_VCPUS 16
+#define KVM_MAX_VCPUS 64
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
0xFFFFFF0000000000ULL)
-#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
- (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
-#define KVM_GUEST_CR0_MASK \
- (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
-#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
- (X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
-#define KVM_VM_CR0_ALWAYS_ON \
- (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
-#define KVM_GUEST_CR4_MASK \
- (X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
-#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
-#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
-
#define INVALID_PAGE (~(hpa_t)0)
#define UNMAPPED_GVA (~(gpa_t)0)
void (*new_cr3)(struct kvm_vcpu *vcpu);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
void (*free)(struct kvm_vcpu *vcpu);
- gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
+ gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
+ u32 *error);
void (*prefetch_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *page);
int (*sync_page)(struct kvm_vcpu *vcpu,
u32 regs_dirty;
unsigned long cr0;
+ unsigned long cr0_guest_owned_bits;
unsigned long cr2;
unsigned long cr3;
unsigned long cr4;
+ unsigned long cr4_guest_owned_bits;
unsigned long cr8;
u32 hflags;
u64 pdptrs[4]; /* pae */
- u64 shadow_efer;
+ u64 efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
int32_t apic_arb_prio;
/* used for guest single stepping over the given code position */
u16 singlestep_cs;
unsigned long singlestep_rip;
+ /* fields used by HYPER-V emulation */
+ u64 hv_vapic;
};
struct kvm_mem_alias {
gfn_t base_gfn;
unsigned long npages;
gfn_t target_gfn;
+#define KVM_ALIAS_INVALID 1UL
+ unsigned long flags;
};
-struct kvm_arch{
- int naliases;
+#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION
+
+struct kvm_mem_aliases {
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
+ int naliases;
+};
+
+struct kvm_arch {
+ struct kvm_mem_aliases *aliases;
unsigned int n_free_mmu_pages;
unsigned int n_requested_mmu_pages;
s64 kvmclock_offset;
struct kvm_xen_hvm_config xen_hvm_config;
+
+ /* fields used by HYPER-V emulation */
+ u64 hv_guest_os_id;
+ u64 hv_hypercall;
};
struct kvm_vm_stat {
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
bool (*cpu_has_accelerated_tpr)(void);
+ void (*cpuid_update)(struct kvm_vcpu *vcpu);
/* Create, but do not attach this VCPU */
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
+ void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
- unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr);
- void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value,
- int *exception);
+ int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest);
+ int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+ void (*fpu_activate)(struct kvm_vcpu *vcpu);
+ void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
void (*tlb_flush)(struct kvm_vcpu *vcpu);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
- bool (*gb_page_enable)(void);
+ int (*get_lpage_level)(void);
+ bool (*rdtscp_supported)(void);
const struct trace_print_flags *exit_reasons_str;
};
unsigned long value);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
-int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
- int type_bits, int seg);
+int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
+gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
+gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
+gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int complete_pio(struct kvm_vcpu *vcpu);
+bool kvm_check_iopl(struct kvm_vcpu *vcpu);
struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
#define _ASM_X86_KVM_PARA_H
#include <linux/types.h>
+#include <asm/hyperv.h>
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
* should be used to determine that a VM is running under KVM.
#define SVM_EXIT_ERR -1
-#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
+#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
*/
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_RDTSCP 0x00000008
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
#define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
#define EXIT_REASON_PAUSE_INSTRUCTION 40
#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define VMX_EPTP_UC_BIT (1ull << 8)
#define VMX_EPTP_WB_BIT (1ull << 14)
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
+#define VMX_EPT_1GB_PAGE_BIT (1ull << 17)
#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
-#define VMX_EPT_IGMT_BIT (1ull << 6)
+#define VMX_EPT_IPAT_BIT (1ull << 6)
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul
register_sysctl_table(kernel_root_table2);
#endif
on_each_cpu(cpu_vsyscall_init, NULL, 1);
- hotcpu_notifier(cpu_vsyscall_notifier, 0);
+ /* notifier priority > KVM */
+ hotcpu_notifier(cpu_vsyscall_notifier, 30);
return 0;
}
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
select USER_RETURN_NOTIFIER
+ select KVM_MMIO
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
#include <linux/module.h>
#include <asm/kvm_emulate.h>
-#include "mmu.h" /* for is_long_mode() */
+#include "x86.h"
/*
* Opcode effective-address decode tables.
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
#define GroupMask 0xff /* Group number stored in bits 0:7 */
/* Misc flags */
+#define Lock (1<<26) /* lock prefix is allowed for the instruction */
+#define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
#define No64 (1<<28)
/* Source 2 operand type */
#define Src2None (0<<29)
enum {
Group1_80, Group1_81, Group1_82, Group1_83,
Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
+ Group8, Group9,
};
static u32 opcode_table[256] = {
/* 0x00 - 0x07 */
- ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+ ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x08 - 0x0F */
- ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+ ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
ImplicitOps | Stack | No64, 0,
/* 0x10 - 0x17 */
- ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+ ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x18 - 0x1F */
- ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+ ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x20 - 0x27 */
- ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+ ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
/* 0x28 - 0x2F */
- ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+ ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
0, 0, 0, 0,
/* 0x30 - 0x37 */
- ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+ ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
0, 0, 0, 0,
/* 0x38 - 0x3F */
Group | Group1_80, Group | Group1_81,
Group | Group1_82, Group | Group1_83,
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
- ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
+ ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
/* 0x88 - 0x8F */
ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xF0 - 0xF7 */
0, 0, 0, 0,
- ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
+ ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
/* 0xF8 - 0xFF */
ImplicitOps, 0, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
static u32 twobyte_table[256] = {
/* 0x00 - 0x0F */
- 0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
- ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
+ 0, Group | GroupDual | Group7, 0, 0,
+ 0, ImplicitOps, ImplicitOps | Priv, 0,
+ ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
+ 0, ImplicitOps | ModRM, 0, 0,
/* 0x10 - 0x1F */
0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
/* 0x20 - 0x2F */
- ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
+ ModRM | ImplicitOps | Priv, ModRM | Priv,
+ ModRM | ImplicitOps | Priv, ModRM | Priv,
+ 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0x30 - 0x3F */
- ImplicitOps, 0, ImplicitOps, 0,
- ImplicitOps, ImplicitOps, 0, 0,
+ ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
+ ImplicitOps, ImplicitOps | Priv, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0x40 - 0x47 */
DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
DstMem | SrcReg | Src2CL | ModRM, 0, 0,
/* 0xA8 - 0xAF */
ImplicitOps | Stack, ImplicitOps | Stack,
- 0, DstMem | SrcReg | ModRM | BitOp,
+ 0, DstMem | SrcReg | ModRM | BitOp | Lock,
DstMem | SrcReg | Src2ImmByte | ModRM,
DstMem | SrcReg | Src2CL | ModRM,
ModRM, 0,
/* 0xB0 - 0xB7 */
- ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
- DstMem | SrcReg | ModRM | BitOp,
+ ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
+ 0, DstMem | SrcReg | ModRM | BitOp | Lock,
0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
DstReg | SrcMem16 | ModRM | Mov,
/* 0xB8 - 0xBF */
- 0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp,
+ 0, 0,
+ Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
DstReg | SrcMem16 | ModRM | Mov,
/* 0xC0 - 0xCF */
- 0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
+ 0, 0, 0, DstMem | SrcReg | ModRM | Mov,
+ 0, 0, 0, Group | GroupDual | Group9,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0xD0 - 0xDF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
static u32 group_table[] = {
[Group1_80*8] =
- ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
- ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
- ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
- ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+ ByteOp | DstMem | SrcImm | ModRM | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | Lock,
+ ByteOp | DstMem | SrcImm | ModRM,
[Group1_81*8] =
- DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
- DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
- DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
- DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
+ DstMem | SrcImm | ModRM | Lock,
+ DstMem | SrcImm | ModRM | Lock,
+ DstMem | SrcImm | ModRM | Lock,
+ DstMem | SrcImm | ModRM | Lock,
+ DstMem | SrcImm | ModRM | Lock,
+ DstMem | SrcImm | ModRM | Lock,
+ DstMem | SrcImm | ModRM | Lock,
+ DstMem | SrcImm | ModRM,
[Group1_82*8] =
- ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
- ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
- ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
- ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+ ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
+ ByteOp | DstMem | SrcImm | ModRM | No64,
[Group1_83*8] =
- DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
- DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
- DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
- DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+ DstMem | SrcImmByte | ModRM | Lock,
+ DstMem | SrcImmByte | ModRM | Lock,
+ DstMem | SrcImmByte | ModRM | Lock,
+ DstMem | SrcImmByte | ModRM | Lock,
+ DstMem | SrcImmByte | ModRM | Lock,
+ DstMem | SrcImmByte | ModRM | Lock,
+ DstMem | SrcImmByte | ModRM | Lock,
+ DstMem | SrcImmByte | ModRM,
[Group1A*8] =
DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
[Group3_Byte*8] =
SrcMem | ModRM | Stack, 0,
SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0,
[Group7*8] =
- 0, 0, ModRM | SrcMem, ModRM | SrcMem,
+ 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
SrcNone | ModRM | DstMem | Mov, 0,
- SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
+ SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
+ [Group8*8] =
+ 0, 0, 0, 0,
+ DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
+ DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
+ [Group9*8] =
+ 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0,
};
static u32 group2_table[] = {
[Group7*8] =
- SrcNone | ModRM, 0, 0, SrcNone | ModRM,
+ SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM,
SrcNone | ModRM | DstMem | Mov, 0,
SrcMem16 | ModRM | Mov, 0,
+ [Group9*8] =
+ 0, 0, 0, 0, 0, 0, 0, 0,
};
/* EFLAGS bit definitions. */
+#define EFLG_ID (1<<21)
+#define EFLG_VIP (1<<20)
+#define EFLG_VIF (1<<19)
+#define EFLG_AC (1<<18)
#define EFLG_VM (1<<17)
#define EFLG_RF (1<<16)
+#define EFLG_IOPL (3<<12)
+#define EFLG_NT (1<<14)
#define EFLG_OF (1<<11)
#define EFLG_DF (1<<10)
#define EFLG_IF (1<<9)
+#define EFLG_TF (1<<8)
#define EFLG_SF (1<<7)
#define EFLG_ZF (1<<6)
#define EFLG_AF (1<<4)
if (linear < fc->start || linear >= fc->end) {
size = min(15UL, PAGE_SIZE - offset_in_page(linear));
- rc = ops->read_std(linear, fc->data, size, ctxt->vcpu);
+ rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
if (rc)
return rc;
fc->start = linear;
op_bytes = 3;
*address = 0;
rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
- ctxt->vcpu);
+ ctxt->vcpu, NULL);
if (rc)
return rc;
rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
- ctxt->vcpu);
+ ctxt->vcpu, NULL);
return rc;
}
switch (mode) {
case X86EMUL_MODE_REAL:
+ case X86EMUL_MODE_VM86:
case X86EMUL_MODE_PROT16:
def_op_bytes = def_ad_bytes = 2;
break;
}
if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
- kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");;
+ kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");
return -1;
}
rc = ops->read_emulated(register_address(c, ss_base(ctxt),
c->regs[VCPU_REGS_RSP]),
dest, len, ctxt->vcpu);
- if (rc != 0)
+ if (rc != X86EMUL_CONTINUE)
return rc;
register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
return rc;
}
+static int emulate_popf(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops,
+ void *dest, int len)
+{
+ int rc;
+ unsigned long val, change_mask;
+ int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+ int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu);
+
+ rc = emulate_pop(ctxt, ops, &val, len);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
+ | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
+
+ switch(ctxt->mode) {
+ case X86EMUL_MODE_PROT64:
+ case X86EMUL_MODE_PROT32:
+ case X86EMUL_MODE_PROT16:
+ if (cpl == 0)
+ change_mask |= EFLG_IOPL;
+ if (cpl <= iopl)
+ change_mask |= EFLG_IF;
+ break;
+ case X86EMUL_MODE_VM86:
+ if (iopl < 3) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return X86EMUL_PROPAGATE_FAULT;
+ }
+ change_mask |= EFLG_IF;
+ break;
+ default: /* real mode */
+ change_mask |= (EFLG_IOPL | EFLG_IF);
+ break;
+ }
+
+ *(unsigned long *)dest =
+ (ctxt->eflags & ~change_mask) | (val & change_mask);
+
+ return rc;
+}
+
static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
{
struct decode_cache *c = &ctxt->decode;
if (rc != 0)
return rc;
- rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg);
+ rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg);
return rc;
}
int rc;
rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
- if (rc != 0)
+ if (rc != X86EMUL_CONTINUE)
return rc;
if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
(u32) c->regs[VCPU_REGS_RBX];
rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
- if (rc != 0)
+ if (rc != X86EMUL_CONTINUE)
return rc;
ctxt->eflags |= EFLG_ZF;
}
rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
if (rc)
return rc;
- rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
+ rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS);
return rc;
}
&c->dst.val,
c->dst.bytes,
ctxt->vcpu);
- if (rc != 0)
+ if (rc != X86EMUL_CONTINUE)
return rc;
break;
case OP_NONE:
u64 msr_data;
/* syscall is not available in real mode */
- if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
- || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
- return -1;
+ if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86)
+ return X86EMUL_UNHANDLEABLE;
setup_syscalls_segments(ctxt, &cs, &ss);
ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
}
- return 0;
+ return X86EMUL_CONTINUE;
}
static int
struct kvm_segment cs, ss;
u64 msr_data;
- /* inject #UD if LOCK prefix is used */
- if (c->lock_prefix)
- return -1;
-
- /* inject #GP if in real mode or paging is disabled */
- if (ctxt->mode == X86EMUL_MODE_REAL ||
- !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
+ /* inject #GP if in real mode */
+ if (ctxt->mode == X86EMUL_MODE_REAL) {
kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
+ return X86EMUL_UNHANDLEABLE;
}
/* XXX sysenter/sysexit have not been tested in 64bit mode.
* Therefore, we inject an #UD.
*/
if (ctxt->mode == X86EMUL_MODE_PROT64)
- return -1;
+ return X86EMUL_UNHANDLEABLE;
setup_syscalls_segments(ctxt, &cs, &ss);
case X86EMUL_MODE_PROT32:
if ((msr_data & 0xfffc) == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
+ return X86EMUL_PROPAGATE_FAULT;
}
break;
case X86EMUL_MODE_PROT64:
if (msr_data == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
+ return X86EMUL_PROPAGATE_FAULT;
}
break;
}
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
c->regs[VCPU_REGS_RSP] = msr_data;
- return 0;
+ return X86EMUL_CONTINUE;
}
static int
u64 msr_data;
int usermode;
- /* inject #UD if LOCK prefix is used */
- if (c->lock_prefix)
- return -1;
-
- /* inject #GP if in real mode or paging is disabled */
- if (ctxt->mode == X86EMUL_MODE_REAL
- || !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
- kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
- }
-
- /* sysexit must be called from CPL 0 */
- if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
+ /* inject #GP if in real mode or Virtual 8086 mode */
+ if (ctxt->mode == X86EMUL_MODE_REAL ||
+ ctxt->mode == X86EMUL_MODE_VM86) {
kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
+ return X86EMUL_UNHANDLEABLE;
}
setup_syscalls_segments(ctxt, &cs, &ss);
cs.selector = (u16)(msr_data + 16);
if ((msr_data & 0xfffc) == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
+ return X86EMUL_PROPAGATE_FAULT;
}
ss.selector = (u16)(msr_data + 24);
break;
cs.selector = (u16)(msr_data + 32);
if (msr_data == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
- return -1;
+ return X86EMUL_PROPAGATE_FAULT;
}
ss.selector = cs.selector + 8;
cs.db = 0;
c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
- return 0;
+ return X86EMUL_CONTINUE;
+}
+
+static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
+{
+ int iopl;
+ if (ctxt->mode == X86EMUL_MODE_REAL)
+ return false;
+ if (ctxt->mode == X86EMUL_MODE_VM86)
+ return true;
+ iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+ return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl;
+}
+
+static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops,
+ u16 port, u16 len)
+{
+ struct kvm_segment tr_seg;
+ int r;
+ u16 io_bitmap_ptr;
+ u8 perm, bit_idx = port & 0x7;
+ unsigned mask = (1 << len) - 1;
+
+ kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
+ if (tr_seg.unusable)
+ return false;
+ if (tr_seg.limit < 103)
+ return false;
+ r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
+ NULL);
+ if (r != X86EMUL_CONTINUE)
+ return false;
+ if (io_bitmap_ptr + port/8 > tr_seg.limit)
+ return false;
+ r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
+ ctxt->vcpu, NULL);
+ if (r != X86EMUL_CONTINUE)
+ return false;
+ if ((perm >> bit_idx) & mask)
+ return false;
+ return true;
+}
+
+static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops,
+ u16 port, u16 len)
+{
+ if (emulator_bad_iopl(ctxt))
+ if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
+ return false;
+ return true;
}
int
memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
saved_eip = c->eip;
+ /* LOCK prefix is allowed only with some instructions */
+ if (c->lock_prefix && !(c->d & Lock)) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
+ }
+
+ /* Privileged instruction can be executed only in CPL=0 */
+ if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
memop = c->modrm_ea;
&c->src.val,
c->src.bytes,
ctxt->vcpu);
- if (rc != 0)
+ if (rc != X86EMUL_CONTINUE)
goto done;
c->src.orig_val = c->src.val;
}
c->dst.ptr = (void *)c->dst.ptr +
(c->src.val & mask) / 8;
}
- if (!(c->d & Mov) &&
- /* optimisation - avoid slow emulated read */
- ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
- &c->dst.val,
- c->dst.bytes, ctxt->vcpu)) != 0))
- goto done;
+ if (!(c->d & Mov)) {
+ /* optimisation - avoid slow emulated read */
+ rc = ops->read_emulated((unsigned long)c->dst.ptr,
+ &c->dst.val,
+ c->dst.bytes,
+ ctxt->vcpu);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ }
}
c->dst.orig_val = c->dst.val;
break;
case 0x6c: /* insb */
case 0x6d: /* insw/insd */
- if (kvm_emulate_pio_string(ctxt->vcpu,
+ if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
+ (c->d & ByteOp) ? 1 : c->op_bytes)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+ if (kvm_emulate_pio_string(ctxt->vcpu,
1,
(c->d & ByteOp) ? 1 : c->op_bytes,
c->rep_prefix ?
return 0;
case 0x6e: /* outsb */
case 0x6f: /* outsw/outsd */
+ if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
+ (c->d & ByteOp) ? 1 : c->op_bytes)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
if (kvm_emulate_pio_string(ctxt->vcpu,
0,
(c->d & ByteOp) ? 1 : c->op_bytes,
break;
case 0x8e: { /* mov seg, r/m16 */
uint16_t sel;
- int type_bits;
- int err;
sel = c->src.val;
- if (c->modrm_reg == VCPU_SREG_SS)
- toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
- if (c->modrm_reg <= 5) {
- type_bits = (c->modrm_reg == 1) ? 9 : 1;
- err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
- type_bits, c->modrm_reg);
- } else {
- printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n",
- c->modrm);
- goto cannot_emulate;
+ if (c->modrm_reg == VCPU_SREG_CS ||
+ c->modrm_reg > VCPU_SREG_GS) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
}
- if (err < 0)
- goto cannot_emulate;
+ if (c->modrm_reg == VCPU_SREG_SS)
+ toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
+
+ rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg);
c->dst.type = OP_NONE; /* Disable writeback. */
break;
c->dst.type = OP_REG;
c->dst.ptr = (unsigned long *) &ctxt->eflags;
c->dst.bytes = c->op_bytes;
- goto pop_instruction;
+ rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ break;
case 0xa0 ... 0xa1: /* mov */
c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
c->dst.val = c->src.val;
c->dst.ptr = (unsigned long *)register_address(c,
es_base(ctxt),
c->regs[VCPU_REGS_RDI]);
- if ((rc = ops->read_emulated(register_address(c,
- seg_override_base(ctxt, c),
- c->regs[VCPU_REGS_RSI]),
+ rc = ops->read_emulated(register_address(c,
+ seg_override_base(ctxt, c),
+ c->regs[VCPU_REGS_RSI]),
&c->dst.val,
- c->dst.bytes, ctxt->vcpu)) != 0)
+ c->dst.bytes, ctxt->vcpu);
+ if (rc != X86EMUL_CONTINUE)
goto done;
register_address_increment(c, &c->regs[VCPU_REGS_RSI],
(ctxt->eflags & EFLG_DF) ? -c->dst.bytes
c->src.ptr = (unsigned long *)register_address(c,
seg_override_base(ctxt, c),
c->regs[VCPU_REGS_RSI]);
- if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
- &c->src.val,
- c->src.bytes,
- ctxt->vcpu)) != 0)
+ rc = ops->read_emulated((unsigned long)c->src.ptr,
+ &c->src.val,
+ c->src.bytes,
+ ctxt->vcpu);
+ if (rc != X86EMUL_CONTINUE)
goto done;
c->dst.type = OP_NONE; /* Disable writeback. */
c->dst.ptr = (unsigned long *)register_address(c,
es_base(ctxt),
c->regs[VCPU_REGS_RDI]);
- if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
- &c->dst.val,
- c->dst.bytes,
- ctxt->vcpu)) != 0)
+ rc = ops->read_emulated((unsigned long)c->dst.ptr,
+ &c->dst.val,
+ c->dst.bytes,
+ ctxt->vcpu);
+ if (rc != X86EMUL_CONTINUE)
goto done;
DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
c->dst.type = OP_REG;
c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
- if ((rc = ops->read_emulated(register_address(c,
- seg_override_base(ctxt, c),
- c->regs[VCPU_REGS_RSI]),
- &c->dst.val,
- c->dst.bytes,
- ctxt->vcpu)) != 0)
+ rc = ops->read_emulated(register_address(c,
+ seg_override_base(ctxt, c),
+ c->regs[VCPU_REGS_RSI]),
+ &c->dst.val,
+ c->dst.bytes,
+ ctxt->vcpu);
+ if (rc != X86EMUL_CONTINUE)
goto done;
register_address_increment(c, &c->regs[VCPU_REGS_RSI],
(ctxt->eflags & EFLG_DF) ? -c->dst.bytes
case 0xe9: /* jmp rel */
goto jmp;
case 0xea: /* jmp far */
- if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9,
- VCPU_SREG_CS) < 0) {
- DPRINTF("jmp far: Failed to load CS descriptor\n");
- goto cannot_emulate;
- }
+ if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val,
+ VCPU_SREG_CS))
+ goto done;
c->eip = c->src.val;
break;
case 0xef: /* out (e/r)ax,dx */
port = c->regs[VCPU_REGS_RDX];
io_dir_in = 0;
- do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
+ do_io:
+ if (!emulator_io_permited(ctxt, ops, port,
+ (c->d & ByteOp) ? 1 : c->op_bytes)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+ if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
(c->d & ByteOp) ? 1 : c->op_bytes,
port) != 0) {
c->eip = saved_eip;
c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xfa: /* cli */
- ctxt->eflags &= ~X86_EFLAGS_IF;
- c->dst.type = OP_NONE; /* Disable writeback. */
+ if (emulator_bad_iopl(ctxt))
+ kvm_inject_gp(ctxt->vcpu, 0);
+ else {
+ ctxt->eflags &= ~X86_EFLAGS_IF;
+ c->dst.type = OP_NONE; /* Disable writeback. */
+ }
break;
case 0xfb: /* sti */
- toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
- ctxt->eflags |= X86_EFLAGS_IF;
- c->dst.type = OP_NONE; /* Disable writeback. */
+ if (emulator_bad_iopl(ctxt))
+ kvm_inject_gp(ctxt->vcpu, 0);
+ else {
+ toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
+ ctxt->eflags |= X86_EFLAGS_IF;
+ c->dst.type = OP_NONE; /* Disable writeback. */
+ }
break;
case 0xfc: /* cld */
ctxt->eflags &= ~EFLG_DF;
}
break;
case 0x05: /* syscall */
- if (emulate_syscall(ctxt) == -1)
- goto cannot_emulate;
+ rc = emulate_syscall(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
else
goto writeback;
break;
c->dst.type = OP_NONE;
break;
case 0x34: /* sysenter */
- if (emulate_sysenter(ctxt) == -1)
- goto cannot_emulate;
+ rc = emulate_sysenter(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
else
goto writeback;
break;
case 0x35: /* sysexit */
- if (emulate_sysexit(ctxt) == -1)
- goto cannot_emulate;
+ rc = emulate_sysexit(ctxt);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
else
goto writeback;
break;
{
struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
irq_ack_notifier);
- spin_lock(&ps->inject_lock);
+ raw_spin_lock(&ps->inject_lock);
if (atomic_dec_return(&ps->pit_timer.pending) < 0)
atomic_inc(&ps->pit_timer.pending);
ps->irq_ack = 1;
- spin_unlock(&ps->inject_lock);
+ raw_spin_unlock(&ps->inject_lock);
}
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
.write = speaker_ioport_write,
};
-/* Caller must have writers lock on slots_lock */
+/* Caller must hold slots_lock */
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
{
struct kvm_pit *pit;
mutex_init(&pit->pit_state.lock);
mutex_lock(&pit->pit_state.lock);
- spin_lock_init(&pit->pit_state.inject_lock);
+ raw_spin_lock_init(&pit->pit_state.inject_lock);
kvm->arch.vpit = pit;
pit->kvm = kvm;
kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
kvm_iodevice_init(&pit->dev, &pit_dev_ops);
- ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+ ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
if (ret < 0)
goto fail;
if (flags & KVM_PIT_SPEAKER_DUMMY) {
kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
- ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
+ ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
&pit->speaker_dev);
if (ret < 0)
goto fail_unregister;
return pit;
fail_unregister:
- __kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
+ kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
fail:
- if (pit->irq_source_id >= 0)
- kvm_free_irq_source_id(kvm, pit->irq_source_id);
+ kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
+ kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
+ kvm_free_irq_source_id(kvm, pit->irq_source_id);
kfree(pit);
return NULL;
/* Try to inject pending interrupts when
* last one has been acked.
*/
- spin_lock(&ps->inject_lock);
+ raw_spin_lock(&ps->inject_lock);
if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
ps->irq_ack = 0;
inject = 1;
}
- spin_unlock(&ps->inject_lock);
+ raw_spin_unlock(&ps->inject_lock);
if (inject)
__inject_pit_timer_intr(kvm);
}
u32 speaker_data_on;
struct mutex lock;
struct kvm_pit *pit;
- spinlock_t inject_lock;
+ raw_spinlock_t inject_lock;
unsigned long irq_ack;
struct kvm_irq_ack_notifier irq_ack_notifier;
};
* Other interrupt may be delivered to PIC while lock is dropped but
* it should be safe since PIC state is already updated at this stage.
*/
- spin_unlock(&s->pics_state->lock);
+ raw_spin_unlock(&s->pics_state->lock);
kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
- spin_lock(&s->pics_state->lock);
+ raw_spin_lock(&s->pics_state->lock);
}
void kvm_pic_clear_isr_ack(struct kvm *kvm)
{
struct kvm_pic *s = pic_irqchip(kvm);
- spin_lock(&s->lock);
+
+ raw_spin_lock(&s->lock);
s->pics[0].isr_ack = 0xff;
s->pics[1].isr_ack = 0xff;
- spin_unlock(&s->lock);
+ raw_spin_unlock(&s->lock);
}
/*
void kvm_pic_update_irq(struct kvm_pic *s)
{
- spin_lock(&s->lock);
+ raw_spin_lock(&s->lock);
pic_update_irq(s);
- spin_unlock(&s->lock);
+ raw_spin_unlock(&s->lock);
}
int kvm_pic_set_irq(void *opaque, int irq, int level)
struct kvm_pic *s = opaque;
int ret = -1;
- spin_lock(&s->lock);
+ raw_spin_lock(&s->lock);
if (irq >= 0 && irq < PIC_NUM_PINS) {
ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
pic_update_irq(s);
trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
s->pics[irq >> 3].imr, ret == 0);
}
- spin_unlock(&s->lock);
+ raw_spin_unlock(&s->lock);
return ret;
}
int irq, irq2, intno;
struct kvm_pic *s = pic_irqchip(kvm);
- spin_lock(&s->lock);
+ raw_spin_lock(&s->lock);
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0) {
pic_intack(&s->pics[0], irq);
intno = s->pics[0].irq_base + irq;
}
pic_update_irq(s);
- spin_unlock(&s->lock);
+ raw_spin_unlock(&s->lock);
return intno;
}
printk(KERN_ERR "PIC: non byte write\n");
return 0;
}
- spin_lock(&s->lock);
+ raw_spin_lock(&s->lock);
switch (addr) {
case 0x20:
case 0x21:
elcr_ioport_write(&s->pics[addr & 1], addr, data);
break;
}
- spin_unlock(&s->lock);
+ raw_spin_unlock(&s->lock);
return 0;
}
printk(KERN_ERR "PIC: non byte read\n");
return 0;
}
- spin_lock(&s->lock);
+ raw_spin_lock(&s->lock);
switch (addr) {
case 0x20:
case 0x21:
break;
}
*(unsigned char *)val = data;
- spin_unlock(&s->lock);
+ raw_spin_unlock(&s->lock);
return 0;
}
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
if (!s)
return NULL;
- spin_lock_init(&s->lock);
+ raw_spin_lock_init(&s->lock);
s->kvm = kvm;
s->pics[0].elcr_mask = 0xf8;
s->pics[1].elcr_mask = 0xde;
* Initialize PIO device
*/
kvm_iodevice_init(&s->dev, &picdev_ops);
- ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
+ mutex_unlock(&kvm->slots_lock);
if (ret < 0) {
kfree(s);
return NULL;
return s;
}
+
+void kvm_destroy_pic(struct kvm *kvm)
+{
+ struct kvm_pic *vpic = kvm->arch.vpic;
+
+ if (vpic) {
+ kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
+ kvm->arch.vpic = NULL;
+ kfree(vpic);
+ }
+}
};
struct kvm_pic {
- spinlock_t lock;
+ raw_spinlock_t lock;
unsigned pending_acks;
struct kvm *kvm;
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
+void kvm_destroy_pic(struct kvm *kvm);
int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
void kvm_pic_clear_isr_ack(struct kvm *kvm);
#ifndef ASM_KVM_CACHE_REGS_H
#define ASM_KVM_CACHE_REGS_H
+#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
+#define KVM_POSSIBLE_CR4_GUEST_BITS \
+ (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
+ | X86_CR4_OSXMMEXCPT | X86_CR4_PGE)
+
static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
return vcpu->arch.pdptrs[index];
}
+static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
+{
+ ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
+ if (tmask & vcpu->arch.cr0_guest_owned_bits)
+ kvm_x86_ops->decache_cr0_guest_bits(vcpu);
+ return vcpu->arch.cr0 & mask;
+}
+
+static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu)
+{
+ return kvm_read_cr0_bits(vcpu, ~0UL);
+}
+
+static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
+{
+ ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS;
+ if (tmask & vcpu->arch.cr4_guest_owned_bits)
+ kvm_x86_ops->decache_cr4_guest_bits(vcpu);
+ return vcpu->arch.cr4 & mask;
+}
+
+static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
+{
+ return kvm_read_cr4_bits(vcpu, ~0UL);
+}
+
#endif
return 0;
}
+
+int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return 1;
+
+ /* if this is ICR write vector before command */
+ if (reg == APIC_ICR)
+ apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
+ return apic_reg_write(apic, reg, (u32)data);
+}
+
+int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+ u32 low, high = 0;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return 1;
+
+ if (apic_reg_read(apic, reg, 4, &low))
+ return 1;
+ if (reg == APIC_ICR)
+ apic_reg_read(apic, APIC_ICR2, 4, &high);
+
+ *data = (((u64)high) << 32) | low;
+
+ return 0;
+}
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+
+int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
+
+static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
+}
#endif
*/
#include "mmu.h"
+#include "x86.h"
#include "kvm_cache_regs.h"
#include <linux/kvm_host.h>
#include <linux/swap.h>
#include <linux/hugetlb.h>
#include <linux/compiler.h>
+#include <linux/srcu.h>
#include <asm/page.h>
#include <asm/cmpxchg.h>
#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
| PT64_NX_MASK)
-#define PFERR_PRESENT_MASK (1U << 0)
-#define PFERR_WRITE_MASK (1U << 1)
-#define PFERR_USER_MASK (1U << 2)
-#define PFERR_RSVD_MASK (1U << 3)
-#define PFERR_FETCH_MASK (1U << 4)
-
-#define PT_PDPE_LEVEL 3
-#define PT_DIRECTORY_LEVEL 2
-#define PT_PAGE_TABLE_LEVEL 1
-
#define RMAP_EXT 4
#define ACC_EXEC_MASK 1
#define ACC_USER_MASK PT_USER_MASK
#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
+#include <trace/events/kvm.h>
+
+#undef TRACE_INCLUDE_FILE
#define CREATE_TRACE_POINTS
#include "mmutrace.h"
static int is_write_protection(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.cr0 & X86_CR0_WP;
+ return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
}
static int is_cpuid_PSE36(void)
static int is_nx(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.shadow_efer & EFER_NX;
+ return vcpu->arch.efer & EFER_NX;
}
static int is_shadow_present_pte(u64 pte)
return pte & PT_PAGE_SIZE_MASK;
}
-static int is_writeble_pte(unsigned long pte)
+static int is_writable_pte(unsigned long pte)
{
return pte & PT_WRITABLE_MASK;
}
static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
{
- unsigned long page_size = PAGE_SIZE;
- struct vm_area_struct *vma;
- unsigned long addr;
+ unsigned long page_size;
int i, ret = 0;
- addr = gfn_to_hva(kvm, gfn);
- if (kvm_is_error_hva(addr))
- return PT_PAGE_TABLE_LEVEL;
-
- down_read(¤t->mm->mmap_sem);
- vma = find_vma(current->mm, addr);
- if (!vma)
- goto out;
-
- page_size = vma_kernel_pagesize(vma);
-
-out:
- up_read(¤t->mm->mmap_sem);
+ page_size = kvm_host_page_size(kvm, gfn);
for (i = PT_PAGE_TABLE_LEVEL;
i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
{
struct kvm_memory_slot *slot;
- int host_level;
- int level = PT_PAGE_TABLE_LEVEL;
+ int host_level, level, max_level;
slot = gfn_to_memslot(vcpu->kvm, large_gfn);
if (slot && slot->dirty_bitmap)
if (host_level == PT_PAGE_TABLE_LEVEL)
return host_level;
- for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level)
+ max_level = kvm_x86_ops->get_lpage_level() < host_level ?
+ kvm_x86_ops->get_lpage_level() : host_level;
+
+ for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
break;
pfn = spte_to_pfn(*spte);
if (*spte & shadow_accessed_mask)
kvm_set_pfn_accessed(pfn);
- if (is_writeble_pte(*spte))
+ if (is_writable_pte(*spte))
kvm_set_pfn_dirty(pfn);
rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
if (!*rmapp) {
prev_desc = desc;
desc = desc->more;
}
+ pr_err("rmap_remove: %p %llx many->many\n", spte, *spte);
BUG();
}
}
BUG_ON(!spte);
BUG_ON(!(*spte & PT_PRESENT_MASK));
rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
- if (is_writeble_pte(*spte)) {
+ if (is_writable_pte(*spte)) {
__set_spte(spte, *spte & ~PT_WRITABLE_MASK);
write_protected = 1;
}
BUG_ON(!(*spte & PT_PRESENT_MASK));
BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
- if (is_writeble_pte(*spte)) {
+ if (is_writable_pte(*spte)) {
rmap_remove(kvm, spte);
--kvm->stat.lpages;
__set_spte(spte, shadow_trap_nonpresent_pte);
new_spte &= ~PT_WRITABLE_MASK;
new_spte &= ~SPTE_HOST_WRITEABLE;
- if (is_writeble_pte(*spte))
+ if (is_writable_pte(*spte))
kvm_set_pfn_dirty(spte_to_pfn(*spte));
__set_spte(spte, new_spte);
spte = rmap_next(kvm, rmapp, spte);
unsigned long data))
{
int i, j;
+ int ret;
int retval = 0;
+ struct kvm_memslots *slots;
- /*
- * If mmap_sem isn't taken, we can look the memslots with only
- * the mmu_lock by skipping over the slots with userspace_addr == 0.
- */
- for (i = 0; i < kvm->nmemslots; i++) {
- struct kvm_memory_slot *memslot = &kvm->memslots[i];
+ slots = rcu_dereference(kvm->memslots);
+
+ for (i = 0; i < slots->nmemslots; i++) {
+ struct kvm_memory_slot *memslot = &slots->memslots[i];
unsigned long start = memslot->userspace_addr;
unsigned long end;
- /* mmu_lock protects userspace_addr */
- if (!start)
- continue;
-
end = start + (memslot->npages << PAGE_SHIFT);
if (hva >= start && hva < end) {
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
- retval |= handler(kvm, &memslot->rmap[gfn_offset],
- data);
+ ret = handler(kvm, &memslot->rmap[gfn_offset], data);
for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
int idx = gfn_offset;
idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
- retval |= handler(kvm,
+ ret |= handler(kvm,
&memslot->lpage_info[j][idx].rmap_pde,
data);
}
+ trace_kvm_age_page(hva, memslot, ret);
+ retval |= ret;
}
}
u64 *spte;
int young = 0;
- /* always return old for EPT */
+ /*
+ * Emulate the accessed bit for EPT, by checking if this page has
+ * an EPT mapping, and clearing it if it does. On the next access,
+ * a new EPT mapping will be established.
+ * This has some overhead, but not as much as the cost of swapping
+ * out actively used pages or breaking up actively used hugepages.
+ */
if (!shadow_accessed_mask)
- return 0;
+ return kvm_unmap_rmapp(kvm, rmapp, data);
spte = rmap_next(kvm, rmapp, NULL);
while (spte) {
static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
{
- int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
+ int slot = memslot_id(kvm, gfn);
struct kvm_mmu_page *sp = page_header(__pa(pte));
__set_bit(slot, sp->slot_bitmap);
{
struct page *page;
- gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+ gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
if (gpa == UNMAPPED_GVA)
return NULL;
* is responsibility of mmu_get_page / kvm_sync_page.
* Same reasoning can be applied to dirty page accounting.
*/
- if (!can_unsync && is_writeble_pte(*sptep))
+ if (!can_unsync && is_writable_pte(*sptep))
goto set_pte;
if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
__func__, gfn);
ret = 1;
pte_access &= ~ACC_WRITE_MASK;
- if (is_writeble_pte(spte))
+ if (is_writable_pte(spte))
spte &= ~PT_WRITABLE_MASK;
}
}
bool reset_host_protection)
{
int was_rmapped = 0;
- int was_writeble = is_writeble_pte(*sptep);
+ int was_writable = is_writable_pte(*sptep);
int rmap_count;
pgprintk("%s: spte %llx access %x write_fault %d"
if (rmap_count > RMAP_RECYCLE_THRESHOLD)
rmap_recycle(vcpu, sptep, gfn);
} else {
- if (was_writeble)
+ if (was_writable)
kvm_release_pfn_dirty(pfn);
else
kvm_release_pfn_clean(pfn);
spin_unlock(&vcpu->kvm->mmu_lock);
}
-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
+ u32 access, u32 *error)
{
+ if (error)
+ *error = 0;
return vaddr;
}
if (tdp_enabled)
return 0;
- gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+ gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
spin_lock(&vcpu->kvm->mmu_lock);
r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
*/
page = alloc_page(GFP_KERNEL | __GFP_DMA32);
if (!page)
- goto error_1;
+ return -ENOMEM;
+
vcpu->arch.mmu.pae_root = page_address(page);
for (i = 0; i < 4; ++i)
vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
return 0;
-
-error_1:
- free_mmu_pages(vcpu);
- return -ENOMEM;
}
int kvm_mmu_create(struct kvm_vcpu *vcpu)
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
- int npages;
+ int npages, idx;
- if (!down_read_trylock(&kvm->slots_lock))
- continue;
+ idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
npages = kvm->arch.n_alloc_mmu_pages -
kvm->arch.n_free_mmu_pages;
nr_to_scan--;
spin_unlock(&kvm->mmu_lock);
- up_read(&kvm->slots_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
}
if (kvm_freed)
list_move_tail(&kvm_freed->vm_list, &vm_list);
int i;
unsigned int nr_mmu_pages;
unsigned int nr_pages = 0;
+ struct kvm_memslots *slots;
- for (i = 0; i < kvm->nmemslots; i++)
- nr_pages += kvm->memslots[i].npages;
+ slots = rcu_dereference(kvm->memslots);
+ for (i = 0; i < slots->nmemslots; i++)
+ nr_pages += slots->memslots[i].npages;
nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
nr_mmu_pages = max(nr_mmu_pages,
if (is_shadow_present_pte(ent) && !is_last_spte(ent, level))
audit_mappings_page(vcpu, ent, va, level - 1);
else {
- gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
+ gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL);
gfn_t gfn = gpa >> PAGE_SHIFT;
pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
static int count_rmaps(struct kvm_vcpu *vcpu)
{
int nmaps = 0;
- int i, j, k;
+ int i, j, k, idx;
+ idx = srcu_read_lock(&kvm->srcu);
+ slots = rcu_dereference(kvm->memslots);
for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
- struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
+ struct kvm_memory_slot *m = &slots->memslots[i];
struct kvm_rmap_desc *d;
for (j = 0; j < m->npages; ++j) {
}
}
}
+ srcu_read_unlock(&kvm->srcu, idx);
return nmaps;
}
#define __KVM_X86_MMU_H
#include <linux/kvm_host.h>
+#include "kvm_cache_regs.h"
#define PT64_PT_BITS 9
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
+#define PT_PDPE_LEVEL 3
+#define PT_DIRECTORY_LEVEL 2
+#define PT_PAGE_TABLE_LEVEL 1
+
+#define PFERR_PRESENT_MASK (1U << 0)
+#define PFERR_WRITE_MASK (1U << 1)
+#define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
+#define PFERR_FETCH_MASK (1U << 4)
+
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
return kvm_mmu_load(vcpu);
}
-static inline int is_long_mode(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_X86_64
- return vcpu->arch.shadow_efer & EFER_LMA;
-#else
- return 0;
-#endif
-}
-
-static inline int is_pae(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.cr4 & X86_CR4_PAE;
-}
-
-static inline int is_pse(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.cr4 & X86_CR4_PSE;
-}
-
-static inline int is_paging(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.cr0 & X86_CR0_PG;
-}
-
static inline int is_present_gpte(unsigned long pte)
{
return pte & PT_PRESENT_MASK;
if (rsvd_fault)
goto access_error;
- if (write_fault && !is_writeble_pte(pte))
+ if (write_fault && !is_writable_pte(pte))
if (user_fault || is_write_protection(vcpu))
goto access_error;
spin_unlock(&vcpu->kvm->mmu_lock);
}
-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
+ u32 *error)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;
- r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
+ r = FNAME(walk_addr)(&walker, vcpu, vaddr,
+ !!(access & PFERR_WRITE_MASK),
+ !!(access & PFERR_USER_MASK),
+ !!(access & PFERR_FETCH_MASK));
if (r) {
gpa = gfn_to_gpa(walker.gfn);
gpa |= vaddr & ~PAGE_MASK;
- }
+ } else if (error)
+ *error = walker.error_code;
return gpa;
}
efer &= ~EFER_LME;
to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
- vcpu->arch.shadow_efer = efer;
+ vcpu->arch.efer = efer;
}
static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
+ svm->vcpu.fpu_active = 1;
+
control->intercept_cr_read = INTERCEPT_CR0_MASK |
INTERCEPT_CR3_MASK |
INTERCEPT_CR4_MASK;
control->intercept_dr_read = INTERCEPT_DR0_MASK |
INTERCEPT_DR1_MASK |
INTERCEPT_DR2_MASK |
- INTERCEPT_DR3_MASK;
+ INTERCEPT_DR3_MASK |
+ INTERCEPT_DR4_MASK |
+ INTERCEPT_DR5_MASK |
+ INTERCEPT_DR6_MASK |
+ INTERCEPT_DR7_MASK;
control->intercept_dr_write = INTERCEPT_DR0_MASK |
INTERCEPT_DR1_MASK |
INTERCEPT_DR2_MASK |
INTERCEPT_DR3_MASK |
+ INTERCEPT_DR4_MASK |
INTERCEPT_DR5_MASK |
+ INTERCEPT_DR6_MASK |
INTERCEPT_DR7_MASK;
control->intercept_exceptions = (1 << PF_VECTOR) |
control->intercept = (1ULL << INTERCEPT_INTR) |
(1ULL << INTERCEPT_NMI) |
(1ULL << INTERCEPT_SMI) |
+ (1ULL << INTERCEPT_SELECTIVE_CR0) |
(1ULL << INTERCEPT_CPUID) |
(1ULL << INTERCEPT_INVD) |
(1ULL << INTERCEPT_HLT) |
control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
(1ULL << INTERCEPT_INVLPG));
control->intercept_exceptions &= ~(1 << PF_VECTOR);
- control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK|
- INTERCEPT_CR3_MASK);
- control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
- INTERCEPT_CR3_MASK);
+ control->intercept_cr_read &= ~INTERCEPT_CR3_MASK;
+ control->intercept_cr_write &= ~INTERCEPT_CR3_MASK;
save->g_pat = 0x0007040600070406ULL;
save->cr3 = 0;
save->cr4 = 0;
init_vmcb(svm);
fx_init(&svm->vcpu);
- svm->vcpu.fpu_active = 1;
svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
if (kvm_vcpu_is_bsp(&svm->vcpu))
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
if (unlikely(cpu != vcpu->cpu)) {
u64 delta;
- /*
- * Make sure that the guest sees a monotonically
- * increasing TSC.
- */
- delta = vcpu->arch.host_tsc - native_read_tsc();
- svm->vmcb->control.tsc_offset += delta;
- if (is_nested(svm))
- svm->nested.hsave->control.tsc_offset += delta;
+ if (check_tsc_unstable()) {
+ /*
+ * Make sure that the guest sees a monotonically
+ * increasing TSC.
+ */
+ delta = vcpu->arch.host_tsc - native_read_tsc();
+ svm->vmcb->control.tsc_offset += delta;
+ if (is_nested(svm))
+ svm->nested.hsave->control.tsc_offset += delta;
+ }
vcpu->cpu = cpu;
kvm_migrate_timers(vcpu);
svm->asid_generation = 0;
svm->vmcb->save.gdtr.base = dt->base ;
}
+static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
+{
+}
+
static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
{
}
+static void update_cr0_intercept(struct vcpu_svm *svm)
+{
+ ulong gcr0 = svm->vcpu.arch.cr0;
+ u64 *hcr0 = &svm->vmcb->save.cr0;
+
+ if (!svm->vcpu.fpu_active)
+ *hcr0 |= SVM_CR0_SELECTIVE_MASK;
+ else
+ *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
+ | (gcr0 & SVM_CR0_SELECTIVE_MASK);
+
+
+ if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
+ svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
+ svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
+ } else {
+ svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
+ svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
+ }
+}
+
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
#ifdef CONFIG_X86_64
- if (vcpu->arch.shadow_efer & EFER_LME) {
+ if (vcpu->arch.efer & EFER_LME) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
- vcpu->arch.shadow_efer |= EFER_LMA;
+ vcpu->arch.efer |= EFER_LMA;
svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
}
if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
- vcpu->arch.shadow_efer &= ~EFER_LMA;
+ vcpu->arch.efer &= ~EFER_LMA;
svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
}
}
#endif
- if (npt_enabled)
- goto set;
+ vcpu->arch.cr0 = cr0;
- if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
- svm->vmcb-