]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - arch/x86/kvm/svm.c
KVM: SVM: check for progress after IRET interception
[linux-2.6.git] / arch / x86 / kvm / svm.c
index cf47a2fc24e917a528a21fd1cdcbf387ee4e912c..8d61df4a02c79af936d98d93477f7a65ca15200a 100644 (file)
@@ -51,6 +51,10 @@ MODULE_LICENSE("GPL");
 #define SVM_FEATURE_LBRV           (1 <<  1)
 #define SVM_FEATURE_SVML           (1 <<  2)
 #define SVM_FEATURE_NRIP           (1 <<  3)
+#define SVM_FEATURE_TSC_RATE       (1 <<  4)
+#define SVM_FEATURE_VMCB_CLEAN     (1 <<  5)
+#define SVM_FEATURE_FLUSH_ASID     (1 <<  6)
+#define SVM_FEATURE_DECODE_ASSIST  (1 <<  7)
 #define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
 
 #define NESTED_EXIT_HOST       0       /* Exit handled on host level */
@@ -98,10 +102,8 @@ struct nested_state {
        unsigned long vmexit_rax;
 
        /* cache for intercepts of the guest */
-       u16 intercept_cr_read;
-       u16 intercept_cr_write;
-       u16 intercept_dr_read;
-       u16 intercept_dr_write;
+       u32 intercept_cr;
+       u32 intercept_dr;
        u32 intercept_exceptions;
        u64 intercept;
 
@@ -133,6 +135,8 @@ struct vcpu_svm {
 
        u32 *msrpm;
 
+       ulong nmi_iret_rip;
+
        struct nested_state nested;
 
        bool nmi_singlestep;
@@ -187,14 +191,151 @@ static int nested_svm_vmexit(struct vcpu_svm *svm);
 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
                                      bool has_error_code, u32 error_code);
 
+enum {
+       VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
+                           pause filter count */
+       VMCB_PERM_MAP,   /* IOPM Base and MSRPM Base */
+       VMCB_ASID,       /* ASID */
+       VMCB_INTR,       /* int_ctl, int_vector */
+       VMCB_NPT,        /* npt_en, nCR3, gPAT */
+       VMCB_CR,         /* CR0, CR3, CR4, EFER */
+       VMCB_DR,         /* DR6, DR7 */
+       VMCB_DT,         /* GDT, IDT */
+       VMCB_SEG,        /* CS, DS, SS, ES, CPL */
+       VMCB_CR2,        /* CR2 only */
+       VMCB_LBR,        /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
+       VMCB_DIRTY_MAX,
+};
+
+/* TPR and CR2 are always written before VMRUN */
+#define VMCB_ALWAYS_DIRTY_MASK ((1U << VMCB_INTR) | (1U << VMCB_CR2))
+
+static inline void mark_all_dirty(struct vmcb *vmcb)
+{
+       vmcb->control.clean = 0;
+}
+
+static inline void mark_all_clean(struct vmcb *vmcb)
+{
+       vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
+                              & ~VMCB_ALWAYS_DIRTY_MASK;
+}
+
+static inline void mark_dirty(struct vmcb *vmcb, int bit)
+{
+       vmcb->control.clean &= ~(1 << bit);
+}
+
 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 {
        return container_of(vcpu, struct vcpu_svm, vcpu);
 }
 
-static inline bool is_nested(struct vcpu_svm *svm)
+static void recalc_intercepts(struct vcpu_svm *svm)
+{
+       struct vmcb_control_area *c, *h;
+       struct nested_state *g;
+
+       mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+
+       if (!is_guest_mode(&svm->vcpu))
+               return;
+
+       c = &svm->vmcb->control;
+       h = &svm->nested.hsave->control;
+       g = &svm->nested;
+
+       c->intercept_cr = h->intercept_cr | g->intercept_cr;
+       c->intercept_dr = h->intercept_dr | g->intercept_dr;
+       c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
+       c->intercept = h->intercept | g->intercept;
+}
+
+static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
+{
+       if (is_guest_mode(&svm->vcpu))
+               return svm->nested.hsave;
+       else
+               return svm->vmcb;
+}
+
+static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
+{
+       struct vmcb *vmcb = get_host_vmcb(svm);
+
+       vmcb->control.intercept_cr |= (1U << bit);
+
+       recalc_intercepts(svm);
+}
+
+static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
+{
+       struct vmcb *vmcb = get_host_vmcb(svm);
+
+       vmcb->control.intercept_cr &= ~(1U << bit);
+
+       recalc_intercepts(svm);
+}
+
+static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
+{
+       struct vmcb *vmcb = get_host_vmcb(svm);
+
+       return vmcb->control.intercept_cr & (1U << bit);
+}
+
+static inline void set_dr_intercept(struct vcpu_svm *svm, int bit)
+{
+       struct vmcb *vmcb = get_host_vmcb(svm);
+
+       vmcb->control.intercept_dr |= (1U << bit);
+
+       recalc_intercepts(svm);
+}
+
+static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit)
 {
-       return svm->nested.vmcb;
+       struct vmcb *vmcb = get_host_vmcb(svm);
+
+       vmcb->control.intercept_dr &= ~(1U << bit);
+
+       recalc_intercepts(svm);
+}
+
+static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
+{
+       struct vmcb *vmcb = get_host_vmcb(svm);
+
+       vmcb->control.intercept_exceptions |= (1U << bit);
+
+       recalc_intercepts(svm);
+}
+
+static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
+{
+       struct vmcb *vmcb = get_host_vmcb(svm);
+
+       vmcb->control.intercept_exceptions &= ~(1U << bit);
+
+       recalc_intercepts(svm);
+}
+
+static inline void set_intercept(struct vcpu_svm *svm, int bit)
+{
+       struct vmcb *vmcb = get_host_vmcb(svm);
+
+       vmcb->control.intercept |= (1ULL << bit);
+
+       recalc_intercepts(svm);
+}
+
+static inline void clr_intercept(struct vcpu_svm *svm, int bit)
+{
+       struct vmcb *vmcb = get_host_vmcb(svm);
+
+       vmcb->control.intercept &= ~(1ULL << bit);
+
+       recalc_intercepts(svm);
 }
 
 static inline void enable_gif(struct vcpu_svm *svm)
@@ -271,11 +412,6 @@ static u32 svm_msrpm_offset(u32 msr)
 
 #define MAX_INST_SIZE 15
 
-static inline u32 svm_has(u32 feat)
-{
-       return svm_features & feat;
-}
-
 static inline void clgi(void)
 {
        asm volatile (__ex(SVM_CLGI));
@@ -291,16 +427,6 @@ static inline void invlpga(unsigned long addr, u32 asid)
        asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
 }
 
-static inline void force_new_asid(struct kvm_vcpu *vcpu)
-{
-       to_svm(vcpu)->asid_generation--;
-}
-
-static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
-{
-       force_new_asid(vcpu);
-}
-
 static int get_npt_level(void)
 {
 #ifdef CONFIG_X86_64
@@ -317,6 +443,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
                efer &= ~EFER_LME;
 
        to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
+       mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
 }
 
 static int is_external_interrupt(u32 info)
@@ -354,7 +481,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
                svm->next_rip = svm->vmcb->control.next_rip;
 
        if (!svm->next_rip) {
-               if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
+               if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
                                EMULATE_DONE)
                        printk(KERN_DEBUG "%s: NOP\n", __func__);
                return;
@@ -381,7 +508,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
            nested_svm_check_exception(svm, nr, has_error_code, error_code))
                return;
 
-       if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) {
+       if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
                unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
 
                /*
@@ -677,7 +804,7 @@ static __init int svm_hardware_setup(void)
 
        svm_features = cpuid_edx(SVM_CPUID_FUNC);
 
-       if (!svm_has(SVM_FEATURE_NPT))
+       if (!boot_cpu_has(X86_FEATURE_NPT))
                npt_enabled = false;
 
        if (npt_enabled && !npt) {
@@ -732,13 +859,15 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
        struct vcpu_svm *svm = to_svm(vcpu);
        u64 g_tsc_offset = 0;
 
-       if (is_nested(svm)) {
+       if (is_guest_mode(vcpu)) {
                g_tsc_offset = svm->vmcb->control.tsc_offset -
                               svm->nested.hsave->control.tsc_offset;
                svm->nested.hsave->control.tsc_offset = offset;
        }
 
        svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
+
+       mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
 static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
@@ -746,8 +875,9 @@ static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
        struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb->control.tsc_offset += adjustment;
-       if (is_nested(svm))
+       if (is_guest_mode(vcpu))
                svm->nested.hsave->control.tsc_offset += adjustment;
+       mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
 }
 
 static void init_vmcb(struct vcpu_svm *svm)
@@ -756,62 +886,62 @@ static void init_vmcb(struct vcpu_svm *svm)
        struct vmcb_save_area *save = &svm->vmcb->save;
 
        svm->vcpu.fpu_active = 1;
+       svm->vcpu.arch.hflags = 0;
 
-       control->intercept_cr_read =    INTERCEPT_CR0_MASK |
-                                       INTERCEPT_CR3_MASK |
-                                       INTERCEPT_CR4_MASK;
-
-       control->intercept_cr_write =   INTERCEPT_CR0_MASK |
-                                       INTERCEPT_CR3_MASK |
-                                       INTERCEPT_CR4_MASK |
-                                       INTERCEPT_CR8_MASK;
-
-       control->intercept_dr_read =    INTERCEPT_DR0_MASK |
-                                       INTERCEPT_DR1_MASK |
-                                       INTERCEPT_DR2_MASK |
-                                       INTERCEPT_DR3_MASK |
-                                       INTERCEPT_DR4_MASK |
-                                       INTERCEPT_DR5_MASK |
-                                       INTERCEPT_DR6_MASK |
-                                       INTERCEPT_DR7_MASK;
-
-       control->intercept_dr_write =   INTERCEPT_DR0_MASK |
-                                       INTERCEPT_DR1_MASK |
-                                       INTERCEPT_DR2_MASK |
-                                       INTERCEPT_DR3_MASK |
-                                       INTERCEPT_DR4_MASK |
-                                       INTERCEPT_DR5_MASK |
-                                       INTERCEPT_DR6_MASK |
-                                       INTERCEPT_DR7_MASK;
-
-       control->intercept_exceptions = (1 << PF_VECTOR) |
-                                       (1 << UD_VECTOR) |
-                                       (1 << MC_VECTOR);
-
-
-       control->intercept =    (1ULL << INTERCEPT_INTR) |
-                               (1ULL << INTERCEPT_NMI) |
-                               (1ULL << INTERCEPT_SMI) |
-                               (1ULL << INTERCEPT_SELECTIVE_CR0) |
-                               (1ULL << INTERCEPT_CPUID) |
-                               (1ULL << INTERCEPT_INVD) |
-                               (1ULL << INTERCEPT_HLT) |
-                               (1ULL << INTERCEPT_INVLPG) |
-                               (1ULL << INTERCEPT_INVLPGA) |
-                               (1ULL << INTERCEPT_IOIO_PROT) |
-                               (1ULL << INTERCEPT_MSR_PROT) |
-                               (1ULL << INTERCEPT_TASK_SWITCH) |
-                               (1ULL << INTERCEPT_SHUTDOWN) |
-                               (1ULL << INTERCEPT_VMRUN) |
-                               (1ULL << INTERCEPT_VMMCALL) |
-                               (1ULL << INTERCEPT_VMLOAD) |
-                               (1ULL << INTERCEPT_VMSAVE) |
-                               (1ULL << INTERCEPT_STGI) |
-                               (1ULL << INTERCEPT_CLGI) |
-                               (1ULL << INTERCEPT_SKINIT) |
-                               (1ULL << INTERCEPT_WBINVD) |
-                               (1ULL << INTERCEPT_MONITOR) |
-                               (1ULL << INTERCEPT_MWAIT);
+       set_cr_intercept(svm, INTERCEPT_CR0_READ);
+       set_cr_intercept(svm, INTERCEPT_CR3_READ);
+       set_cr_intercept(svm, INTERCEPT_CR4_READ);
+       set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
+       set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
+       set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
+       set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+
+       set_dr_intercept(svm, INTERCEPT_DR0_READ);
+       set_dr_intercept(svm, INTERCEPT_DR1_READ);
+       set_dr_intercept(svm, INTERCEPT_DR2_READ);
+       set_dr_intercept(svm, INTERCEPT_DR3_READ);
+       set_dr_intercept(svm, INTERCEPT_DR4_READ);
+       set_dr_intercept(svm, INTERCEPT_DR5_READ);
+       set_dr_intercept(svm, INTERCEPT_DR6_READ);
+       set_dr_intercept(svm, INTERCEPT_DR7_READ);
+
+       set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
+       set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
+       set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
+       set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
+       set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
+       set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
+       set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
+       set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
+
+       set_exception_intercept(svm, PF_VECTOR);
+       set_exception_intercept(svm, UD_VECTOR);
+       set_exception_intercept(svm, MC_VECTOR);
+
+       set_intercept(svm, INTERCEPT_INTR);
+       set_intercept(svm, INTERCEPT_NMI);
+       set_intercept(svm, INTERCEPT_SMI);
+       set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
+       set_intercept(svm, INTERCEPT_CPUID);
+       set_intercept(svm, INTERCEPT_INVD);
+       set_intercept(svm, INTERCEPT_HLT);
+       set_intercept(svm, INTERCEPT_INVLPG);
+       set_intercept(svm, INTERCEPT_INVLPGA);
+       set_intercept(svm, INTERCEPT_IOIO_PROT);
+       set_intercept(svm, INTERCEPT_MSR_PROT);
+       set_intercept(svm, INTERCEPT_TASK_SWITCH);
+       set_intercept(svm, INTERCEPT_SHUTDOWN);
+       set_intercept(svm, INTERCEPT_VMRUN);
+       set_intercept(svm, INTERCEPT_VMMCALL);
+       set_intercept(svm, INTERCEPT_VMLOAD);
+       set_intercept(svm, INTERCEPT_VMSAVE);
+       set_intercept(svm, INTERCEPT_STGI);
+       set_intercept(svm, INTERCEPT_CLGI);
+       set_intercept(svm, INTERCEPT_SKINIT);
+       set_intercept(svm, INTERCEPT_WBINVD);
+       set_intercept(svm, INTERCEPT_MONITOR);
+       set_intercept(svm, INTERCEPT_MWAIT);
+       set_intercept(svm, INTERCEPT_XSETBV);
 
        control->iopm_base_pa = iopm_base;
        control->msrpm_base_pa = __pa(svm->msrpm);
@@ -862,25 +992,27 @@ static void init_vmcb(struct vcpu_svm *svm)
        if (npt_enabled) {
                /* Setup VMCB for Nested Paging */
                control->nested_ctl = 1;
-               control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
-                                       (1ULL << INTERCEPT_INVLPG));
-               control->intercept_exceptions &= ~(1 << PF_VECTOR);
-               control->intercept_cr_read &= ~INTERCEPT_CR3_MASK;
-               control->intercept_cr_write &= ~INTERCEPT_CR3_MASK;
+               clr_intercept(svm, INTERCEPT_TASK_SWITCH);
+               clr_intercept(svm, INTERCEPT_INVLPG);
+               clr_exception_intercept(svm, PF_VECTOR);
+               clr_cr_intercept(svm, INTERCEPT_CR3_READ);
+               clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
                save->g_pat = 0x0007040600070406ULL;
                save->cr3 = 0;
                save->cr4 = 0;
        }
-       force_new_asid(&svm->vcpu);
+       svm->asid_generation = 0;
 
        svm->nested.vmcb = 0;
        svm->vcpu.arch.hflags = 0;
 
-       if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
+       if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
                control->pause_filter_count = 3000;
-               control->intercept |= (1ULL << INTERCEPT_PAUSE);
+               set_intercept(svm, INTERCEPT_PAUSE);
        }
 
+       mark_all_dirty(svm->vmcb);
+
        enable_gif(svm);
 }
 
@@ -997,6 +1129,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        if (unlikely(cpu != vcpu->cpu)) {
                svm->asid_generation = 0;
+               mark_all_dirty(svm->vmcb);
        }
 
 #ifdef CONFIG_X86_64
@@ -1019,8 +1152,8 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
        kvm_load_ldt(svm->host.ldt);
 #ifdef CONFIG_X86_64
        loadsegment(fs, svm->host.fs);
-       load_gs_index(svm->host.gs);
        wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
+       load_gs_index(svm->host.gs);
 #else
        loadsegment(gs, svm->host.gs);
 #endif
@@ -1043,7 +1176,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
        switch (reg) {
        case VCPU_EXREG_PDPTR:
                BUG_ON(!npt_enabled);
-               load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3);
+               load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
                break;
        default:
                BUG();
@@ -1052,12 +1185,12 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 
 static void svm_set_vintr(struct vcpu_svm *svm)
 {
-       svm->vmcb->control.intercept |= 1ULL << INTERCEPT_VINTR;
+       set_intercept(svm, INTERCEPT_VINTR);
 }
 
 static void svm_clear_vintr(struct vcpu_svm *svm)
 {
-       svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR);
+       clr_intercept(svm, INTERCEPT_VINTR);
 }
 
 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
@@ -1172,6 +1305,7 @@ static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
 
        svm->vmcb->save.idtr.limit = dt->size;
        svm->vmcb->save.idtr.base = dt->address ;
+       mark_dirty(svm->vmcb, VMCB_DT);
 }
 
 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
@@ -1188,19 +1322,23 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
 
        svm->vmcb->save.gdtr.limit = dt->size;
        svm->vmcb->save.gdtr.base = dt->address ;
+       mark_dirty(svm->vmcb, VMCB_DT);
 }
 
 static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
 {
 }
 
+static void svm_decache_cr3(struct kvm_vcpu *vcpu)
+{
+}
+
 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 {
 }
 
 static void update_cr0_intercept(struct vcpu_svm *svm)
 {
-       struct vmcb *vmcb = svm->vmcb;
        ulong gcr0 = svm->vcpu.arch.cr0;
        u64 *hcr0 = &svm->vmcb->save.cr0;
 
@@ -1210,27 +1348,14 @@ static void update_cr0_intercept(struct vcpu_svm *svm)
                *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
                        | (gcr0 & SVM_CR0_SELECTIVE_MASK);
 
+       mark_dirty(svm->vmcb, VMCB_CR);
 
        if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
-               vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
-               vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
-               if (is_nested(svm)) {
-                       struct vmcb *hsave = svm->nested.hsave;
-
-                       hsave->control.intercept_cr_read  &= ~INTERCEPT_CR0_MASK;
-                       hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
-                       vmcb->control.intercept_cr_read  |= svm->nested.intercept_cr_read;
-                       vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write;
-               }
+               clr_cr_intercept(svm, INTERCEPT_CR0_READ);
+               clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
        } else {
-               svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
-               svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
-               if (is_nested(svm)) {
-                       struct vmcb *hsave = svm->nested.hsave;
-
-                       hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
-                       hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
-               }
+               set_cr_intercept(svm, INTERCEPT_CR0_READ);
+               set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
        }
 }
 
@@ -1238,7 +1363,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
-       if (is_nested(svm)) {
+       if (is_guest_mode(vcpu)) {
                /*
                 * We are here because we run in nested mode, the host kvm
                 * intercepts cr0 writes but the l1 hypervisor does not.
@@ -1290,6 +1415,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
         */
        cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
        svm->vmcb->save.cr0 = cr0;
+       mark_dirty(svm->vmcb, VMCB_CR);
        update_cr0_intercept(svm);
 }
 
@@ -1299,13 +1425,14 @@ static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
 
        if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
-               force_new_asid(vcpu);
+               svm_flush_tlb(vcpu);
 
        vcpu->arch.cr4 = cr4;
        if (!npt_enabled)
                cr4 |= X86_CR4_PAE;
        cr4 |= host_cr4_mce;
        to_svm(vcpu)->vmcb->save.cr4 = cr4;
+       mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
 }
 
 static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -1334,26 +1461,25 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
                        = (svm->vmcb->save.cs.attrib
                           >> SVM_SELECTOR_DPL_SHIFT) & 3;
 
+       mark_dirty(svm->vmcb, VMCB_SEG);
 }
 
 static void update_db_intercept(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
-       svm->vmcb->control.intercept_exceptions &=
-               ~((1 << DB_VECTOR) | (1 << BP_VECTOR));
+       clr_exception_intercept(svm, DB_VECTOR);
+       clr_exception_intercept(svm, BP_VECTOR);
 
        if (svm->nmi_singlestep)
-               svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
+               set_exception_intercept(svm, DB_VECTOR);
 
        if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
                if (vcpu->guest_debug &
                    (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
-                       svm->vmcb->control.intercept_exceptions |=
-                               1 << DB_VECTOR;
+                       set_exception_intercept(svm, DB_VECTOR);
                if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
-                       svm->vmcb->control.intercept_exceptions |=
-                               1 << BP_VECTOR;
+                       set_exception_intercept(svm, BP_VECTOR);
        } else
                vcpu->guest_debug = 0;
 }
@@ -1367,6 +1493,8 @@ static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
        else
                svm->vmcb->save.dr7 = vcpu->arch.dr7;
 
+       mark_dirty(svm->vmcb, VMCB_DR);
+
        update_db_intercept(vcpu);
 }
 
@@ -1380,6 +1508,8 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
 
        svm->asid_generation = sd->asid_generation;
        svm->vmcb->control.asid = sd->next_asid++;
+
+       mark_dirty(svm->vmcb, VMCB_ASID);
 }
 
 static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
@@ -1387,6 +1517,7 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
        struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb->save.dr7 = value;
+       mark_dirty(svm->vmcb, VMCB_DR);
 }
 
 static int pf_interception(struct vcpu_svm *svm)
@@ -1402,7 +1533,9 @@ static int pf_interception(struct vcpu_svm *svm)
                trace_kvm_page_fault(fault_address, error_code);
                if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
                        kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
-               r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
+               r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
+                       svm->vmcb->control.insn_bytes,
+                       svm->vmcb->control.insn_len);
                break;
        case KVM_PV_REASON_PAGE_NOT_PRESENT:
                svm->apf_reason = 0;
@@ -1465,7 +1598,7 @@ static int ud_interception(struct vcpu_svm *svm)
 {
        int er;
 
-       er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
+       er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
        if (er != EMULATE_DONE)
                kvm_queue_exception(&svm->vcpu, UD_VECTOR);
        return 1;
@@ -1474,21 +1607,8 @@ static int ud_interception(struct vcpu_svm *svm)
 static void svm_fpu_activate(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-       u32 excp;
 
-       if (is_nested(svm)) {
-               u32 h_excp, n_excp;
-
-               h_excp  = svm->nested.hsave->control.intercept_exceptions;
-               n_excp  = svm->nested.intercept_exceptions;
-               h_excp &= ~(1 << NM_VECTOR);
-               excp    = h_excp | n_excp;
-       } else {
-               excp  = svm->vmcb->control.intercept_exceptions;
-               excp &= ~(1 << NM_VECTOR);
-       }
-
-       svm->vmcb->control.intercept_exceptions = excp;
+       clr_exception_intercept(svm, NM_VECTOR);
 
        svm->vcpu.fpu_active = 1;
        update_cr0_intercept(svm);
@@ -1595,7 +1715,7 @@ static int io_interception(struct vcpu_svm *svm)
        string = (io_info & SVM_IOIO_STR_MASK) != 0;
        in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
        if (string || in)
-               return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE;
+               return emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
        port = io_info >> 16;
        size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
@@ -1649,17 +1769,19 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
        struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb->control.nested_cr3 = root;
-       force_new_asid(vcpu);
+       mark_dirty(svm->vmcb, VMCB_NPT);
+       svm_flush_tlb(vcpu);
 }
 
-static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu)
+static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
+                                      struct x86_exception *fault)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb->control.exit_code = SVM_EXIT_NPF;
        svm->vmcb->control.exit_code_hi = 0;
-       svm->vmcb->control.exit_info_1 = vcpu->arch.fault.error_code;
-       svm->vmcb->control.exit_info_2 = vcpu->arch.fault.address;
+       svm->vmcb->control.exit_info_1 = fault->error_code;
+       svm->vmcb->control.exit_info_2 = fault->address;
 
        nested_svm_vmexit(svm);
 }
@@ -1705,7 +1827,7 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 {
        int vmexit;
 
-       if (!is_nested(svm))
+       if (!is_guest_mode(&svm->vcpu))
                return 0;
 
        svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
@@ -1723,7 +1845,7 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
 /* This function returns true if it is save to enable the irq window */
 static inline bool nested_svm_intr(struct vcpu_svm *svm)
 {
-       if (!is_nested(svm))
+       if (!is_guest_mode(&svm->vcpu))
                return true;
 
        if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
@@ -1762,7 +1884,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
 /* This function returns true if it is save to enable the nmi window */
 static inline bool nested_svm_nmi(struct vcpu_svm *svm)
 {
-       if (!is_nested(svm))
+       if (!is_guest_mode(&svm->vcpu))
                return true;
 
        if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
@@ -1890,27 +2012,15 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
        case SVM_EXIT_IOIO:
                vmexit = nested_svm_intercept_ioio(svm);
                break;
-       case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: {
-               u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0);
-               if (svm->nested.intercept_cr_read & cr_bits)
+       case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
+               u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
+               if (svm->nested.intercept_cr & bit)
                        vmexit = NESTED_EXIT_DONE;
                break;
        }
-       case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR8: {
-               u32 cr_bits = 1 << (exit_code - SVM_EXIT_WRITE_CR0);
-               if (svm->nested.intercept_cr_write & cr_bits)
-                       vmexit = NESTED_EXIT_DONE;
-               break;
-       }
-       case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR7: {
-               u32 dr_bits = 1 << (exit_code - SVM_EXIT_READ_DR0);
-               if (svm->nested.intercept_dr_read & dr_bits)
-                       vmexit = NESTED_EXIT_DONE;
-               break;
-       }
-       case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR7: {
-               u32 dr_bits = 1 << (exit_code - SVM_EXIT_WRITE_DR0);
-               if (svm->nested.intercept_dr_write & dr_bits)
+       case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
+               u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
+               if (svm->nested.intercept_dr & bit)
                        vmexit = NESTED_EXIT_DONE;
                break;
        }
@@ -1955,10 +2065,8 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr
        struct vmcb_control_area *dst  = &dst_vmcb->control;
        struct vmcb_control_area *from = &from_vmcb->control;
 
-       dst->intercept_cr_read    = from->intercept_cr_read;
-       dst->intercept_cr_write   = from->intercept_cr_write;
-       dst->intercept_dr_read    = from->intercept_dr_read;
-       dst->intercept_dr_write   = from->intercept_dr_write;
+       dst->intercept_cr         = from->intercept_cr;
+       dst->intercept_dr         = from->intercept_dr;
        dst->intercept_exceptions = from->intercept_exceptions;
        dst->intercept            = from->intercept;
        dst->iopm_base_pa         = from->iopm_base_pa;
@@ -1999,7 +2107,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
        if (!nested_vmcb)
                return 1;
 
-       /* Exit nested SVM mode */
+       /* Exit Guest-Mode */
+       leave_guest_mode(&svm->vcpu);
        svm->nested.vmcb = 0;
 
        /* Give the current vmcb to the guest */
@@ -2013,7 +2122,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
        nested_vmcb->save.idtr   = vmcb->save.idtr;
        nested_vmcb->save.efer   = svm->vcpu.arch.efer;
        nested_vmcb->save.cr0    = kvm_read_cr0(&svm->vcpu);
-       nested_vmcb->save.cr3    = svm->vcpu.arch.cr3;
+       nested_vmcb->save.cr3    = kvm_read_cr3(&svm->vcpu);
        nested_vmcb->save.cr2    = vmcb->save.cr2;
        nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
        nested_vmcb->save.rflags = vmcb->save.rflags;
@@ -2090,6 +2199,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
        svm->vmcb->save.cpl = 0;
        svm->vmcb->control.exit_int_info = 0;
 
+       mark_all_dirty(svm->vmcb);
+
        nested_svm_unmap(page);
 
        nested_svm_uninit_mmu_context(&svm->vcpu);
@@ -2177,8 +2288,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
                               nested_vmcb->control.event_inj,
                               nested_vmcb->control.nested_ctl);
 
-       trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read,
-                                   nested_vmcb->control.intercept_cr_write,
+       trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
+                                   nested_vmcb->control.intercept_cr >> 16,
                                    nested_vmcb->control.intercept_exceptions,
                                    nested_vmcb->control.intercept);
 
@@ -2206,7 +2317,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
        if (npt_enabled)
                hsave->save.cr3    = vmcb->save.cr3;
        else
-               hsave->save.cr3    = svm->vcpu.arch.cr3;
+               hsave->save.cr3    = kvm_read_cr3(&svm->vcpu);
 
        copy_vmcb_control_area(hsave, vmcb);
 
@@ -2258,14 +2369,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
        svm->nested.vmcb_iopm  = nested_vmcb->control.iopm_base_pa  & ~0x0fffULL;
 
        /* cache intercepts */
-       svm->nested.intercept_cr_read    = nested_vmcb->control.intercept_cr_read;
-       svm->nested.intercept_cr_write   = nested_vmcb->control.intercept_cr_write;
-       svm->nested.intercept_dr_read    = nested_vmcb->control.intercept_dr_read;
-       svm->nested.intercept_dr_write   = nested_vmcb->control.intercept_dr_write;
+       svm->nested.intercept_cr         = nested_vmcb->control.intercept_cr;
+       svm->nested.intercept_dr         = nested_vmcb->control.intercept_dr;
        svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
        svm->nested.intercept            = nested_vmcb->control.intercept;
 
-       force_new_asid(&svm->vcpu);
+       svm_flush_tlb(&svm->vcpu);
        svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
        if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
                svm->vcpu.arch.hflags |= HF_VINTR_MASK;
@@ -2274,29 +2383,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 
        if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
                /* We only want the cr8 intercept bits of the guest */
-               svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK;
-               svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
+               clr_cr_intercept(svm, INTERCEPT_CR8_READ);
+               clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
        }
 
        /* We don't want to see VMMCALLs from a nested guest */
-       svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VMMCALL);
-
-       /*
-        * We don't want a nested guest to be more powerful than the guest, so
-        * all intercepts are ORed
-        */
-       svm->vmcb->control.intercept_cr_read |=
-               nested_vmcb->control.intercept_cr_read;
-       svm->vmcb->control.intercept_cr_write |=
-               nested_vmcb->control.intercept_cr_write;
-       svm->vmcb->control.intercept_dr_read |=
-               nested_vmcb->control.intercept_dr_read;
-       svm->vmcb->control.intercept_dr_write |=
-               nested_vmcb->control.intercept_dr_write;
-       svm->vmcb->control.intercept_exceptions |=
-               nested_vmcb->control.intercept_exceptions;
-
-       svm->vmcb->control.intercept |= nested_vmcb->control.intercept;
+       clr_intercept(svm, INTERCEPT_VMMCALL);
 
        svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
        svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
@@ -2307,11 +2399,21 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 
        nested_svm_unmap(page);
 
-       /* nested_vmcb is our indicator if nested SVM is activated */
+       /* Enter Guest-Mode */
+       enter_guest_mode(&svm->vcpu);
+
+       /*
+        * Merge guest and host intercepts - must be called  with vcpu in
+        * guest-mode to take affect here
+        */
+       recalc_intercepts(svm);
+
        svm->nested.vmcb = vmcb_gpa;
 
        enable_gif(svm);
 
+       mark_all_dirty(svm->vmcb);
+
        return true;
 }
 
@@ -2429,6 +2531,8 @@ static int clgi_interception(struct vcpu_svm *svm)
        svm_clear_vintr(svm);
        svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
 
+       mark_dirty(svm->vmcb, VMCB_INTR);
+
        return 1;
 }
 
@@ -2455,6 +2559,19 @@ static int skinit_interception(struct vcpu_svm *svm)
        return 1;
 }
 
+static int xsetbv_interception(struct vcpu_svm *svm)
+{
+       u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
+       u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+
+       if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
+               svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
+               skip_emulated_instruction(&svm->vcpu);
+       }
+
+       return 1;
+}
+
 static int invalid_op_interception(struct vcpu_svm *svm)
 {
        kvm_queue_exception(&svm->vcpu, UD_VECTOR);
@@ -2536,19 +2653,93 @@ static int cpuid_interception(struct vcpu_svm *svm)
 static int iret_interception(struct vcpu_svm *svm)
 {
        ++svm->vcpu.stat.nmi_window_exits;
-       svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET);
+       clr_intercept(svm, INTERCEPT_IRET);
        svm->vcpu.arch.hflags |= HF_IRET_MASK;
+       svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
        return 1;
 }
 
 static int invlpg_interception(struct vcpu_svm *svm)
 {
-       return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE;
+       if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
+               return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+
+       kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
+       skip_emulated_instruction(&svm->vcpu);
+       return 1;
 }
 
 static int emulate_on_interception(struct vcpu_svm *svm)
 {
-       return emulate_instruction(&svm->vcpu, 0, 0, 0) == EMULATE_DONE;
+       return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+}
+
+#define CR_VALID (1ULL << 63)
+
+static int cr_interception(struct vcpu_svm *svm)
+{
+       int reg, cr;
+       unsigned long val;
+       int err;
+
+       if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
+               return emulate_on_interception(svm);
+
+       if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
+               return emulate_on_interception(svm);
+
+       reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
+       cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
+
+       err = 0;
+       if (cr >= 16) { /* mov to cr */
+               cr -= 16;
+               val = kvm_register_read(&svm->vcpu, reg);
+               switch (cr) {
+               case 0:
+                       err = kvm_set_cr0(&svm->vcpu, val);
+                       break;
+               case 3:
+                       err = kvm_set_cr3(&svm->vcpu, val);
+                       break;
+               case 4:
+                       err = kvm_set_cr4(&svm->vcpu, val);
+                       break;
+               case 8:
+                       err = kvm_set_cr8(&svm->vcpu, val);
+                       break;
+               default:
+                       WARN(1, "unhandled write to CR%d", cr);
+                       kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+                       return 1;
+               }
+       } else { /* mov from cr */
+               switch (cr) {
+               case 0:
+                       val = kvm_read_cr0(&svm->vcpu);
+                       break;
+               case 2:
+                       val = svm->vcpu.arch.cr2;
+                       break;
+               case 3:
+                       val = kvm_read_cr3(&svm->vcpu);
+                       break;
+               case 4:
+                       val = kvm_read_cr4(&svm->vcpu);
+                       break;
+               case 8:
+                       val = kvm_get_cr8(&svm->vcpu);
+                       break;
+               default:
+                       WARN(1, "unhandled read from CR%d", cr);
+                       kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+                       return 1;
+               }
+               kvm_register_write(&svm->vcpu, reg, val);
+       }
+       kvm_complete_insn_gp(&svm->vcpu, err);
+
+       return 1;
 }
 
 static int cr0_write_interception(struct vcpu_svm *svm)
@@ -2556,7 +2747,7 @@ static int cr0_write_interception(struct vcpu_svm *svm)
        struct kvm_vcpu *vcpu = &svm->vcpu;
        int r;
 
-       r = emulate_instruction(&svm->vcpu, 0, 0, 0);
+       r = cr_interception(svm);
 
        if (svm->nested.vmexit_rip) {
                kvm_register_write(vcpu, VCPU_REGS_RIP, svm->nested.vmexit_rip);
@@ -2565,22 +2756,49 @@ static int cr0_write_interception(struct vcpu_svm *svm)
                svm->nested.vmexit_rip = 0;
        }
 
-       return r == EMULATE_DONE;
+       return r;
+}
+
+static int dr_interception(struct vcpu_svm *svm)
+{
+       int reg, dr;
+       unsigned long val;
+       int err;
+
+       if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
+               return emulate_on_interception(svm);
+
+       reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
+       dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
+
+       if (dr >= 16) { /* mov to DRn */
+               val = kvm_register_read(&svm->vcpu, reg);
+               kvm_set_dr(&svm->vcpu, dr - 16, val);
+       } else {
+               err = kvm_get_dr(&svm->vcpu, dr, &val);
+               if (!err)
+                       kvm_register_write(&svm->vcpu, reg, val);
+       }
+
+       skip_emulated_instruction(&svm->vcpu);
+
+       return 1;
 }
 
 static int cr8_write_interception(struct vcpu_svm *svm)
 {
        struct kvm_run *kvm_run = svm->vcpu.run;
+       int r;
 
        u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
        /* instruction emulation calls kvm_set_cr8() */
-       emulate_instruction(&svm->vcpu, 0, 0, 0);
+       r = cr_interception(svm);
        if (irqchip_in_kernel(svm->vcpu.kvm)) {
-               svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
-               return 1;
+               clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+               return r;
        }
        if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
-               return 1;
+               return r;
        kvm_run->exit_reason = KVM_EXIT_SET_TPR;
        return 0;
 }
@@ -2591,14 +2809,9 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 
        switch (ecx) {
        case MSR_IA32_TSC: {
-               u64 tsc_offset;
+               struct vmcb *vmcb = get_host_vmcb(svm);
 
-               if (is_nested(svm))
-                       tsc_offset = svm->nested.hsave->control.tsc_offset;
-               else
-                       tsc_offset = svm->vmcb->control.tsc_offset;
-
-               *data = tsc_offset + native_read_tsc();
+               *data = vmcb->control.tsc_offset + native_read_tsc();
                break;
        }
        case MSR_STAR:
@@ -2743,7 +2956,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
                svm->vmcb->save.sysenter_esp = data;
                break;
        case MSR_IA32_DEBUGCTLMSR:
-               if (!svm_has(SVM_FEATURE_LBRV)) {
+               if (!boot_cpu_has(X86_FEATURE_LBRV)) {
                        pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
                                        __func__, data);
                        break;
@@ -2752,6 +2965,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
                        return 1;
 
                svm->vmcb->save.dbgctl = data;
+               mark_dirty(svm->vmcb, VMCB_LBR);
                if (data & (1ULL<<0))
                        svm_enable_lbrv(svm);
                else
@@ -2804,6 +3018,7 @@ static int interrupt_window_interception(struct vcpu_svm *svm)
        kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
        svm_clear_vintr(svm);
        svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+       mark_dirty(svm->vmcb, VMCB_INTR);
        /*
         * If the user space waits to inject interrupts, exit as soon as
         * possible
@@ -2826,31 +3041,31 @@ static int pause_interception(struct vcpu_svm *svm)
 }
 
 static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
-       [SVM_EXIT_READ_CR0]                     = emulate_on_interception,
-       [SVM_EXIT_READ_CR3]                     = emulate_on_interception,
-       [SVM_EXIT_READ_CR4]                     = emulate_on_interception,
-       [SVM_EXIT_READ_CR8]                     = emulate_on_interception,
+       [SVM_EXIT_READ_CR0]                     = cr_interception,
+       [SVM_EXIT_READ_CR3]                     = cr_interception,
+       [SVM_EXIT_READ_CR4]                     = cr_interception,
+       [SVM_EXIT_READ_CR8]                     = cr_interception,
        [SVM_EXIT_CR0_SEL_WRITE]                = emulate_on_interception,
        [SVM_EXIT_WRITE_CR0]                    = cr0_write_interception,
-       [SVM_EXIT_WRITE_CR3]                    = emulate_on_interception,
-       [SVM_EXIT_WRITE_CR4]                    = emulate_on_interception,
+       [SVM_EXIT_WRITE_CR3]                    = cr_interception,
+       [SVM_EXIT_WRITE_CR4]                    = cr_interception,
        [SVM_EXIT_WRITE_CR8]                    = cr8_write_interception,
-       [SVM_EXIT_READ_DR0]                     = emulate_on_interception,
-       [SVM_EXIT_READ_DR1]                     = emulate_on_interception,
-       [SVM_EXIT_READ_DR2]                     = emulate_on_interception,
-       [SVM_EXIT_READ_DR3]                     = emulate_on_interception,
-       [SVM_EXIT_READ_DR4]                     = emulate_on_interception,
-       [SVM_EXIT_READ_DR5]                     = emulate_on_interception,
-       [SVM_EXIT_READ_DR6]                     = emulate_on_interception,
-       [SVM_EXIT_READ_DR7]                     = emulate_on_interception,
-       [SVM_EXIT_WRITE_DR0]                    = emulate_on_interception,
-       [SVM_EXIT_WRITE_DR1]                    = emulate_on_interception,
-       [SVM_EXIT_WRITE_DR2]                    = emulate_on_interception,
-       [SVM_EXIT_WRITE_DR3]                    = emulate_on_interception,
-       [SVM_EXIT_WRITE_DR4]                    = emulate_on_interception,
-       [SVM_EXIT_WRITE_DR5]                    = emulate_on_interception,
-       [SVM_EXIT_WRITE_DR6]                    = emulate_on_interception,
-       [SVM_EXIT_WRITE_DR7]                    = emulate_on_interception,
+       [SVM_EXIT_READ_DR0]                     = dr_interception,
+       [SVM_EXIT_READ_DR1]                     = dr_interception,
+       [SVM_EXIT_READ_DR2]                     = dr_interception,
+       [SVM_EXIT_READ_DR3]                     = dr_interception,
+       [SVM_EXIT_READ_DR4]                     = dr_interception,
+       [SVM_EXIT_READ_DR5]                     = dr_interception,
+       [SVM_EXIT_READ_DR6]                     = dr_interception,
+       [SVM_EXIT_READ_DR7]                     = dr_interception,
+       [SVM_EXIT_WRITE_DR0]                    = dr_interception,
+       [SVM_EXIT_WRITE_DR1]                    = dr_interception,
+       [SVM_EXIT_WRITE_DR2]                    = dr_interception,
+       [SVM_EXIT_WRITE_DR3]                    = dr_interception,
+       [SVM_EXIT_WRITE_DR4]                    = dr_interception,
+       [SVM_EXIT_WRITE_DR5]                    = dr_interception,
+       [SVM_EXIT_WRITE_DR6]                    = dr_interception,
+       [SVM_EXIT_WRITE_DR7]                    = dr_interception,
        [SVM_EXIT_EXCP_BASE + DB_VECTOR]        = db_interception,
        [SVM_EXIT_EXCP_BASE + BP_VECTOR]        = bp_interception,
        [SVM_EXIT_EXCP_BASE + UD_VECTOR]        = ud_interception,
@@ -2883,6 +3098,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_WBINVD]                       = emulate_on_interception,
        [SVM_EXIT_MONITOR]                      = invalid_op_interception,
        [SVM_EXIT_MWAIT]                        = invalid_op_interception,
+       [SVM_EXIT_XSETBV]                       = xsetbv_interception,
        [SVM_EXIT_NPF]                          = pf_interception,
 };
 
@@ -2893,10 +3109,10 @@ void dump_vmcb(struct kvm_vcpu *vcpu)
        struct vmcb_save_area *save = &svm->vmcb->save;
 
        pr_err("VMCB Control Area:\n");
-       pr_err("cr_read:            %04x\n", control->intercept_cr_read);
-       pr_err("cr_write:           %04x\n", control->intercept_cr_write);
-       pr_err("dr_read:            %04x\n", control->intercept_dr_read);
-       pr_err("dr_write:           %04x\n", control->intercept_dr_write);
+       pr_err("cr_read:            %04x\n", control->intercept_cr & 0xffff);
+       pr_err("cr_write:           %04x\n", control->intercept_cr >> 16);
+       pr_err("dr_read:            %04x\n", control->intercept_dr & 0xffff);
+       pr_err("dr_write:           %04x\n", control->intercept_dr >> 16);
        pr_err("exceptions:         %08x\n", control->intercept_exceptions);
        pr_err("intercepts:         %016llx\n", control->intercept);
        pr_err("pause filter count: %d\n", control->pause_filter_count);
@@ -2979,15 +3195,23 @@ void dump_vmcb(struct kvm_vcpu *vcpu)
 
 }
 
+static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
+{
+       struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
+
+       *info1 = control->exit_info_1;
+       *info2 = control->exit_info_2;
+}
+
 static int handle_exit(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
        struct kvm_run *kvm_run = vcpu->run;
        u32 exit_code = svm->vmcb->control.exit_code;
 
-       trace_kvm_exit(exit_code, vcpu);
+       trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
 
-       if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
+       if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
                vcpu->arch.cr0 = svm->vmcb->save.cr0;
        if (npt_enabled)
                vcpu->arch.cr3 = svm->vmcb->save.cr3;
@@ -2999,7 +3223,7 @@ static int handle_exit(struct kvm_vcpu *vcpu)
                return 1;
        }
 
-       if (is_nested(svm)) {
+       if (is_guest_mode(vcpu)) {
                int vmexit;
 
                trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
@@ -3062,7 +3286,6 @@ static void pre_svm_run(struct vcpu_svm *svm)
 
        struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
 
-       svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
        /* FIXME: handle wraparound of asid_generation */
        if (svm->asid_generation != sd->asid_generation)
                new_asid(svm, sd);
@@ -3074,7 +3297,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
 
        svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
        vcpu->arch.hflags |= HF_NMI_MASK;
-       svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET);
+       set_intercept(svm, INTERCEPT_IRET);
        ++vcpu->stat.nmi_injections;
 }
 
@@ -3087,6 +3310,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
        control->int_ctl &= ~V_INTR_PRIO_MASK;
        control->int_ctl |= V_IRQ_MASK |
                ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
+       mark_dirty(svm->vmcb, VMCB_INTR);
 }
 
 static void svm_set_irq(struct kvm_vcpu *vcpu)
@@ -3106,14 +3330,14 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
-       if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
+       if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
                return;
 
        if (irr == -1)
                return;
 
        if (tpr >= irr)
-               svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
+               set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
 }
 
 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -3141,10 +3365,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 
        if (masked) {
                svm->vcpu.arch.hflags |= HF_NMI_MASK;
-               svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET);
+               set_intercept(svm, INTERCEPT_IRET);
        } else {
                svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
-               svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET);
+               clr_intercept(svm, INTERCEPT_IRET);
        }
 }
 
@@ -3160,7 +3384,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
 
        ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
 
-       if (is_nested(svm))
+       if (is_guest_mode(vcpu))
                return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
 
        return ret;
@@ -3206,7 +3430,12 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
 
 static void svm_flush_tlb(struct kvm_vcpu *vcpu)
 {
-       force_new_asid(vcpu);
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
+               svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
+       else
+               svm->asid_generation--;
 }
 
 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
@@ -3217,10 +3446,10 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
-       if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
+       if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
                return;
 
-       if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
+       if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
                int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
                kvm_set_cr8(vcpu, cr8);
        }
@@ -3231,7 +3460,7 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
        struct vcpu_svm *svm = to_svm(vcpu);
        u64 cr8;
 
-       if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK))
+       if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
                return;
 
        cr8 = kvm_get_cr8(vcpu);
@@ -3248,7 +3477,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 
        svm->int3_injected = 0;
 
-       if (svm->vcpu.arch.hflags & HF_IRET_MASK) {
+       /*
+        * If we've made progress since setting HF_IRET_MASK, we've
+        * executed an IRET and can allow NMI injection.
+        */
+       if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
+           && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
                svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
                kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
        }
@@ -3421,17 +3655,27 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
        local_irq_disable();
 
-       stgi();
-
        vcpu->arch.cr2 = svm->vmcb->save.cr2;
        vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
        vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
        vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
 
+       if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
+               kvm_before_handle_nmi(&svm->vcpu);
+
+       stgi();
+
+       /* Any pending NMI will happen here */
+
+       if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
+               kvm_after_handle_nmi(&svm->vcpu);
+
        sync_cr8_to_lapic(vcpu);
 
        svm->next_rip = 0;
 
+       svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
+
        /* if exit due to PF check for async PF */
        if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
                svm->apf_reason = kvm_read_and_reset_pf_reason();
@@ -3448,6 +3692,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
        if (unlikely(svm->vmcb->control.exit_code ==
                     SVM_EXIT_EXCP_BASE + MC_VECTOR))
                svm_handle_mce(svm);
+
+       mark_all_clean(svm->vmcb);
 }
 
 #undef R
@@ -3457,7 +3703,8 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
        struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb->save.cr3 = root;
-       force_new_asid(vcpu);
+       mark_dirty(svm->vmcb, VMCB_CR);
+       svm_flush_tlb(vcpu);
 }
 
 static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
@@ -3465,11 +3712,13 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
        struct vcpu_svm *svm = to_svm(vcpu);
 
        svm->vmcb->control.nested_cr3 = root;
+       mark_dirty(svm->vmcb, VMCB_NPT);
 
        /* Also sync guest cr3 here in case we live migrate */
-       svm->vmcb->save.cr3 = vcpu->arch.cr3;
+       svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
+       mark_dirty(svm->vmcb, VMCB_CR);
 
-       force_new_asid(vcpu);
+       svm_flush_tlb(vcpu);
 }
 
 static int is_disabled(void)
@@ -3516,10 +3765,6 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 {
        switch (func) {
-       case 0x00000001:
-               /* Mask out xsave bit as long as it is not supported by SVM */
-               entry->ecx &= ~(bit(X86_FEATURE_XSAVE));
-               break;
        case 0x80000001:
                if (nested)
                        entry->ecx |= (1 << 2); /* Set SVM bit */
@@ -3533,7 +3778,7 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
                                   additional features */
 
                /* Support next_rip if host supports it */
-               if (svm_has(SVM_FEATURE_NRIP))
+               if (boot_cpu_has(X86_FEATURE_NRIPS))
                        entry->edx |= SVM_FEATURE_NRIP;
 
                /* Support NPT for the guest if enabled */
@@ -3593,6 +3838,7 @@ static const struct trace_print_flags svm_exit_reasons_str[] = {
        { SVM_EXIT_WBINVD,                      "wbinvd" },
        { SVM_EXIT_MONITOR,                     "monitor" },
        { SVM_EXIT_MWAIT,                       "mwait" },
+       { SVM_EXIT_XSETBV,                      "xsetbv" },
        { SVM_EXIT_NPF,                         "npf" },
        { -1, NULL }
 };
@@ -3616,9 +3862,7 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
 
-       svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
-       if (is_nested(svm))
-               svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR;
+       set_exception_intercept(svm, NM_VECTOR);
        update_cr0_intercept(svm);
 }
 
@@ -3649,6 +3893,7 @@ static struct kvm_x86_ops svm_x86_ops = {
        .get_cpl = svm_get_cpl,
        .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
        .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
+       .decache_cr3 = svm_decache_cr3,
        .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
        .set_cr0 = svm_set_cr0,
        .set_cr3 = svm_set_cr3,
@@ -3689,7 +3934,9 @@ static struct kvm_x86_ops svm_x86_ops = {
        .get_tdp_level = get_npt_level,
        .get_mt_mask = svm_get_mt_mask,
 
+       .get_exit_info = svm_get_exit_info,
        .exit_reasons_str = svm_exit_reasons_str,
+
        .get_lpage_level = svm_get_lpage_level,
 
        .cpuid_update = svm_cpuid_update,